[PATCH] R600: Make sign_extend_inreg legal.

Wed Apr 2 09:59:11 PDT 2014

On Tue, Apr 01, 2014 at 11:34:09AM -0700, Matt Arsenault wrote:
> I don't know why I didn't just do this before
> 
> http://llvm-reviews.chandlerc.com/D3250
> 
> Files:
>   lib/Target/R600/AMDGPUISelLowering.cpp
>   lib/Target/R600/EvergreenInstructions.td
>   lib/Target/R600/SIISelLowering.cpp
>   lib/Target/R600/SIInstructions.td

> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -212,15 +212,21 @@
>      setOperationAction(ISD::SELECT, VT, Expand);
>    }
>  
> -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
> +  if (!Subtarget->hasBFE())
> +    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
> +
>    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
>    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
>  
> -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom);
> +  if (!Subtarget->hasBFE())
> +    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
> +
>    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
>    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
>  
> -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom);
> +  if (!Subtarget->hasBFE())
> +    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
> +
>    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
>    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
>  
> @@ -1029,81 +1035,22 @@
>    MVT VT = Op.getSimpleValueType();
>    MVT ScalarVT = VT.getScalarType();
>  
> -  unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits();
> -  unsigned DestBits = ScalarVT.getSizeInBits();
> -  unsigned BitsDiff = DestBits - SrcBits;
> -
> -  if (!Subtarget->hasBFE())
> -    return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG);
> +  if (!VT.isVector())
> +    return SDValue();
>  
>    SDValue Src = Op.getOperand(0);
> -  if (VT.isVector()) {
> -    SDLoc DL(Op);
> -    // Need to scalarize this, and revisit each of the scalars later.
> -    // TODO: Don't scalarize on Evergreen?
> -    unsigned NElts = VT.getVectorNumElements();
> -    SmallVector<SDValue, 8> Args;
> -    ExtractVectorElements(Src, DAG, Args, 0, NElts);
> -
> -    SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
> -    for (unsigned I = 0; I < NElts; ++I)
> -      Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
> -
> -    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size());
> -  }
> -
> -  if (SrcBits == 32) {
> -    SDLoc DL(Op);
> -
> -    // If the source is 32-bits, this is really half of a 2-register pair, and
> -    // we need to discard the unused half of the pair.
> -    SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src);
> -    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc);
> -  }
> -
> -  unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1;
> -
> -  // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
> -  // might not be worth the effort, and will need to expand to shifts when
> -  // fixing SGPR copies.
> -  if (SrcBits < 32 && DestBits <= 32) {
> -    SDLoc DL(Op);
> -    MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts);
> -
> -    if (DestBits != 32)
> -      Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src);
> -
> -    // FIXME: This should use TargetConstant, but that hits assertions for
> -    // Evergreen.
> -    SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT,
> -                              Op.getOperand(0), // Operand
> -                              DAG.getConstant(0, ExtVT), // Offset
> -                              DAG.getConstant(SrcBits, ExtVT)); // Width
> -
> -    // Truncate to the original type if necessary.
> -    if (ScalarVT == MVT::i32)
> -      return Ext;
> -    return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext);
> -  }
> -
> -  // For small types, extend to 32-bits first.
> -  if (SrcBits < 32) {
> -    SDLoc DL(Op);
> -    MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts);
> +  SDLoc DL(Op);
>  
> -    SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src);
> -    SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32,
> -                                DL,
> -                                ExtVT,
> -                                TruncSrc, // Operand
> -                                DAG.getConstant(0, ExtVT), // Offset
> -                                DAG.getConstant(SrcBits, ExtVT)); // Width
> +  // TODO: Don't scalarize on Evergreen?
> +  unsigned NElts = VT.getVectorNumElements();
> +  SmallVector<SDValue, 8> Args;
> +  ExtractVectorElements(Src, DAG, Args, 0, NElts);
>  
> -    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32);
> -  }
> +  SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
> +  for (unsigned I = 0; I < NElts; ++I)
> +    Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
>  
> -  // For everything else, use the standard bitshift expansion.
> -  return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG);
> +  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size());
>  }
>  
>  //===----------------------------------------------------------------------===//
> Index: lib/Target/R600/EvergreenInstructions.td
> ===================================================================
> --- lib/Target/R600/EvergreenInstructions.td
> +++ lib/Target/R600/EvergreenInstructions.td
> @@ -286,6 +286,13 @@
>    VecALU
>  >;
>  
> +def : Pat<(i32 (sext_inreg i32:$src, i1)),
> +  (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>;
> +def : Pat<(i32 (sext_inreg i32:$src, i8)),
> +  (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>;
> +def : Pat<(i32 (sext_inreg i32:$src, i16)),
> +  (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
> +
>  defm : BFIPatterns <BFI_INT_eg>;
>  
>  def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -147,9 +147,6 @@
>    setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
>    setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
>  
> -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
> -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
> -
>    setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
>    setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
>    setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -71,6 +71,7 @@
>  def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
>    [(set i32:$dst, (sext_inreg i32:$src0, i16))]
>  >;
> +
>  ////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
>  ////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
>  ////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
> @@ -125,21 +126,6 @@
>  >;
>  */
>  
> -// Handle sext_inreg in i64
> -def : Pat <
> -  (i64 (sext_inreg i64:$src, i8)),
> -  (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> -    (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
> -    (S_MOV_B32 -1), sub1)
> ->;
> -
> -def : Pat <
> -  (i64 (sext_inreg i64:$src, i16)),
> -  (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> -    (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
> -    (S_MOV_B32 -1), sub1)
> ->;
> -
>  let isCompare = 1 in {
>  def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
>  def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
> @@ -2267,6 +2253,32 @@
>  >;
>  
>  //===----------------------------------------------------------------------===//
> +// Conversion Patterns
> +//===----------------------------------------------------------------------===//
> +
> +def : Pat<(i32 (sext_inreg i32:$src, i1)),
> +  (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
> +
> +// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
> +// might not be worth the effort, and will need to expand to shifts when
> +// fixing SGPR copies.
> +
> +// Handle sext_inreg in i64
> +def : Pat <
> +  (i64 (sext_inreg i64:$src, i8)),
> +  (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> +    (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
> +    (S_MOV_B32 -1), sub1)
> +>;
> +
> +def : Pat <
> +  (i64 (sext_inreg i64:$src, i16)),
> +  (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
> +    (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
> +    (S_MOV_B32 -1), sub1)
> +>;
> +

I want to move away from using patterns with INSERT_SUBREG, because
it just adds another case we need to handle in the SIFixSGPRCopies pass.
Can you move this pattern into AMDILISelDAGToDAG.cpp and use REG_SEQUENCE
instead of INSERT_SUBREG + IMPLICIT_DEF.

-Tom

> +//===----------------------------------------------------------------------===//
>  // Miscellaneous Patterns
>  //===----------------------------------------------------------------------===//
>  

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits