[PATCH] R600: Add target nodes for BFM and BFI

Tom Stellard tom at stellard.net
Mon Mar 31 06:15:02 PDT 2014


LGTM.

-Tom

On Sat, Mar 29, 2014 at 01:22:15PM -0700, Matt Arsenault wrote:
> Add intrinsics for BFE, BFI and BFM to help writing tests
> 
> http://llvm-reviews.chandlerc.com/D3218
> 
> Files:
>   lib/Target/R600/AMDGPUISelLowering.cpp
>   lib/Target/R600/AMDGPUISelLowering.h
>   lib/Target/R600/AMDGPUInstrInfo.td
>   lib/Target/R600/AMDGPUIntrinsics.td
>   lib/Target/R600/EvergreenInstructions.td
>   lib/Target/R600/SIInstructions.td
>   test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
>   test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
>   test/CodeGen/R600/llvm.AMDGPU.bfi.ll
>   test/CodeGen/R600/llvm.AMDGPU.bfm.ll

> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -559,6 +559,30 @@
>      case AMDGPUIntrinsic::AMDGPU_umin:
>        return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
>                                                    Op.getOperand(2));
> +
> +    case AMDGPUIntrinsic::AMDGPU_bfe_i32:
> +      return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
> +                         Op.getOperand(1),
> +                         Op.getOperand(2),
> +                         Op.getOperand(3));
> +
> +    case AMDGPUIntrinsic::AMDGPU_bfe_u32:
> +      return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
> +                         Op.getOperand(1),
> +                         Op.getOperand(2),
> +                         Op.getOperand(3));
> +
> +    case AMDGPUIntrinsic::AMDGPU_bfi:
> +      return DAG.getNode(AMDGPUISD::BFI, DL, VT,
> +                         Op.getOperand(1),
> +                         Op.getOperand(2),
> +                         Op.getOperand(3));
> +
> +    case AMDGPUIntrinsic::AMDGPU_bfm:
> +      return DAG.getNode(AMDGPUISD::BFM, DL, VT,
> +                         Op.getOperand(1),
> +                         Op.getOperand(2));
> +
>      case AMDGPUIntrinsic::AMDIL_round_nearest:
>        return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
>    }
> @@ -1176,6 +1200,8 @@
>    NODE_NAME_CASE(UMIN)
>    NODE_NAME_CASE(BFE_U32)
>    NODE_NAME_CASE(BFE_I32)
> +  NODE_NAME_CASE(BFI)
> +  NODE_NAME_CASE(BFM)
>    NODE_NAME_CASE(URECIP)
>    NODE_NAME_CASE(DOT4)
>    NODE_NAME_CASE(EXPORT)
> Index: lib/Target/R600/AMDGPUISelLowering.h
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.h
> +++ lib/Target/R600/AMDGPUISelLowering.h
> @@ -186,6 +186,8 @@
>    DOT4,
>    BFE_U32, // Extract range of bits with zero extension to 32-bits.
>    BFE_I32, // Extract range of bits with sign extension to 32-bits.
> +  BFI, // (src0 & src1) | (~src0 & src2)
> +  BFM, // Insert a range of bits into a 32-bit word.
>    TEXTURE_FETCH,
>    EXPORT,
>    CONST_ADDRESS,
> Index: lib/Target/R600/AMDGPUInstrInfo.td
> ===================================================================
> --- lib/Target/R600/AMDGPUInstrInfo.td
> +++ lib/Target/R600/AMDGPUInstrInfo.td
> @@ -89,4 +89,6 @@
>  
>  def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
>  def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
> +def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
> +def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
>  
> Index: lib/Target/R600/AMDGPUIntrinsics.td
> ===================================================================
> --- lib/Target/R600/AMDGPUIntrinsics.td
> +++ lib/Target/R600/AMDGPUIntrinsics.td
> @@ -50,7 +50,10 @@
>    def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
>    def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
>    def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -
> +  def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
>    def int_AMDGPU_barrier_local  : Intrinsic<[], [], []>;
>  }
>  
> Index: lib/Target/R600/EvergreenInstructions.td
> ===================================================================
> --- lib/Target/R600/EvergreenInstructions.td
> +++ lib/Target/R600/EvergreenInstructions.td
> @@ -281,10 +281,18 @@
>  // XXX: This pattern is broken, disabling for now.  See comment in
>  // AMDGPUInstructions.td for more info.
>  //  def : BFEPattern <BFE_UINT_eg>;
> +def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
> +  [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
> +  VecALU
> +>;
>  
> -def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
>  defm : BFIPatterns <BFI_INT_eg>;
>  
> +def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
> +  [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
> +  VecALU
> +>;
> +
>  def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
>    [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU
>  >;
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -989,7 +989,8 @@
>  
>  } // End isCommutable = 1
>  
> -defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
> +defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32",
> +  [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
>  defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
>  defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
>  defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
> @@ -1064,7 +1065,8 @@
>    [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>;
>  }
>  
> -def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
> +def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32",
> +  [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>;
>  defm : BFIPatterns <V_BFI_B32>;
>  def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
>    [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
> Index: test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
> @@ -0,0 +1,40 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
> +
> +; FUNC-LABEL: @bfe_i32_arg_arg_arg
> +; SI: V_BFE_I32
> +; EG: BFE_INT
> +define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
> +  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
> +  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfe_i32_arg_arg_imm
> +; SI: V_BFE_I32
> +; EG: BFE_INT
> +define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
> +  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
> +  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfe_i32_arg_imm_arg
> +; SI: V_BFE_I32
> +; EG: BFE_INT
> +define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
> +  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
> +  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfe_i32_imm_arg_arg
> +; SI: V_BFE_I32
> +; EG: BFE_INT
> +define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
> +  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
> +  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> Index: test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
> @@ -0,0 +1,40 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
> +
> +; FUNC-LABEL: @bfe_u32_arg_arg_arg
> +; SI: V_BFE_U32
> +; EG: BFE_UINT
> +define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
> +  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
> +  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfe_u32_arg_arg_imm
> +; SI: V_BFE_U32
> +; EG: BFE_UINT
> +define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
> +  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
> +  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfe_u32_arg_imm_arg
> +; SI: V_BFE_U32
> +; EG: BFE_UINT
> +define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
> +  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
> +  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfe_u32_imm_arg_arg
> +; SI: V_BFE_U32
> +; EG: BFE_UINT
> +define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
> +  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
> +  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> Index: test/CodeGen/R600/llvm.AMDGPU.bfi.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/llvm.AMDGPU.bfi.ll
> @@ -0,0 +1,41 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +declare i32 @llvm.AMDGPU.bfi(i32, i32, i32) nounwind readnone
> +
> +; FUNC-LABEL: @bfi_arg_arg_arg
> +; SI: V_BFI_B32
> +; EG: BFI_INT
> +define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
> +  %bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
> +  store i32 %bfi, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfi_arg_arg_imm
> +; SI: V_BFI_B32
> +; EG: BFI_INT
> +define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
> +  %bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 123) nounwind readnone
> +  store i32 %bfi, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfi_arg_imm_arg
> +; SI: V_BFI_B32
> +; EG: BFI_INT
> +define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
> +  %bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 123, i32 %src2) nounwind readnone
> +  store i32 %bfi, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfi_imm_arg_arg
> +; SI: V_BFI_B32
> +; EG: BFI_INT
> +define void @bfi_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
> +  %bfi = call i32 @llvm.AMDGPU.bfi(i32 123, i32 %src1, i32 %src2) nounwind readnone
> +  store i32 %bfi, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> Index: test/CodeGen/R600/llvm.AMDGPU.bfm.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/llvm.AMDGPU.bfm.ll
> @@ -0,0 +1,40 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone
> +
> +; FUNC-LABEL: @bfm_arg_arg
> +; SI: V_BFM
> +; EG: BFM_INT
> +define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
> +  %bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 %src1) nounwind readnone
> +  store i32 %bfm, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfm_arg_imm
> +; SI: V_BFM
> +; EG: BFM_INT
> +define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
> +  %bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 123) nounwind readnone
> +  store i32 %bfm, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfm_imm_arg
> +; SI: V_BFM
> +; EG: BFM_INT
> +define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
> +  %bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 %src1) nounwind readnone
> +  store i32 %bfm, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @bfm_imm_imm
> +; SI: V_BFM
> +; EG: BFM_INT
> +define void @bfm_imm_imm(i32 addrspace(1)* %out) nounwind {
> +  %bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 456) nounwind readnone
> +  store i32 %bfm, i32 addrspace(1)* %out, align 4
> +  ret void
> +}

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list