[PATCH] R600: Add target nodes for BFM and BFI
Tom Stellard
tom at stellard.net
Mon Mar 31 06:15:02 PDT 2014
LGTM.
-Tom
On Sat, Mar 29, 2014 at 01:22:15PM -0700, Matt Arsenault wrote:
> Add intrinsics for BFE, BFI and BFM to help writing tests
>
> http://llvm-reviews.chandlerc.com/D3218
>
> Files:
> lib/Target/R600/AMDGPUISelLowering.cpp
> lib/Target/R600/AMDGPUISelLowering.h
> lib/Target/R600/AMDGPUInstrInfo.td
> lib/Target/R600/AMDGPUIntrinsics.td
> lib/Target/R600/EvergreenInstructions.td
> lib/Target/R600/SIInstructions.td
> test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
> test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
> test/CodeGen/R600/llvm.AMDGPU.bfi.ll
> test/CodeGen/R600/llvm.AMDGPU.bfm.ll
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -559,6 +559,30 @@
> case AMDGPUIntrinsic::AMDGPU_umin:
> return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
> Op.getOperand(2));
> +
> + case AMDGPUIntrinsic::AMDGPU_bfe_i32:
> + return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
> + Op.getOperand(1),
> + Op.getOperand(2),
> + Op.getOperand(3));
> +
> + case AMDGPUIntrinsic::AMDGPU_bfe_u32:
> + return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
> + Op.getOperand(1),
> + Op.getOperand(2),
> + Op.getOperand(3));
> +
> + case AMDGPUIntrinsic::AMDGPU_bfi:
> + return DAG.getNode(AMDGPUISD::BFI, DL, VT,
> + Op.getOperand(1),
> + Op.getOperand(2),
> + Op.getOperand(3));
> +
> + case AMDGPUIntrinsic::AMDGPU_bfm:
> + return DAG.getNode(AMDGPUISD::BFM, DL, VT,
> + Op.getOperand(1),
> + Op.getOperand(2));
> +
> case AMDGPUIntrinsic::AMDIL_round_nearest:
> return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
> }
> @@ -1176,6 +1200,8 @@
> NODE_NAME_CASE(UMIN)
> NODE_NAME_CASE(BFE_U32)
> NODE_NAME_CASE(BFE_I32)
> + NODE_NAME_CASE(BFI)
> + NODE_NAME_CASE(BFM)
> NODE_NAME_CASE(URECIP)
> NODE_NAME_CASE(DOT4)
> NODE_NAME_CASE(EXPORT)
> Index: lib/Target/R600/AMDGPUISelLowering.h
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.h
> +++ lib/Target/R600/AMDGPUISelLowering.h
> @@ -186,6 +186,8 @@
> DOT4,
> BFE_U32, // Extract range of bits with zero extension to 32-bits.
> BFE_I32, // Extract range of bits with sign extension to 32-bits.
> + BFI, // (src0 & src1) | (~src0 & src2)
> + BFM, // Insert a range of bits into a 32-bit word.
> TEXTURE_FETCH,
> EXPORT,
> CONST_ADDRESS,
> Index: lib/Target/R600/AMDGPUInstrInfo.td
> ===================================================================
> --- lib/Target/R600/AMDGPUInstrInfo.td
> +++ lib/Target/R600/AMDGPUInstrInfo.td
> @@ -89,4 +89,6 @@
>
> def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
> def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
> +def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
> +def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
>
> Index: lib/Target/R600/AMDGPUIntrinsics.td
> ===================================================================
> --- lib/Target/R600/AMDGPUIntrinsics.td
> +++ lib/Target/R600/AMDGPUIntrinsics.td
> @@ -50,7 +50,10 @@
> def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> -
> + def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> + def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> + def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> + def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
> }
>
> Index: lib/Target/R600/EvergreenInstructions.td
> ===================================================================
> --- lib/Target/R600/EvergreenInstructions.td
> +++ lib/Target/R600/EvergreenInstructions.td
> @@ -281,10 +281,18 @@
> // XXX: This pattern is broken, disabling for now. See comment in
> // AMDGPUInstructions.td for more info.
> // def : BFEPattern <BFE_UINT_eg>;
> +def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
> + [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
> + VecALU
> +>;
>
> -def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
> defm : BFIPatterns <BFI_INT_eg>;
>
> +def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
> + [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
> + VecALU
> +>;
> +
> def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
> [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU
> >;
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -989,7 +989,8 @@
>
> } // End isCommutable = 1
>
> -defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
> +defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32",
> + [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
> defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
> defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
> defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
> @@ -1064,7 +1065,8 @@
> [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>;
> }
>
> -def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
> +def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32",
> + [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>;
> defm : BFIPatterns <V_BFI_B32>;
> def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
> [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
> Index: test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
> @@ -0,0 +1,40 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
> +
> +; FUNC-LABEL: @bfe_i32_arg_arg_arg
> +; SI: V_BFE_I32
> +; EG: BFE_INT
> +define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
> + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
> + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfe_i32_arg_arg_imm
> +; SI: V_BFE_I32
> +; EG: BFE_INT
> +define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
> + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
> + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfe_i32_arg_imm_arg
> +; SI: V_BFE_I32
> +; EG: BFE_INT
> +define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
> + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
> + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfe_i32_imm_arg_arg
> +; SI: V_BFE_I32
> +; EG: BFE_INT
> +define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
> + %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
> + store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> Index: test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
> @@ -0,0 +1,40 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
> +
> +; FUNC-LABEL: @bfe_u32_arg_arg_arg
> +; SI: V_BFE_U32
> +; EG: BFE_UINT
> +define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
> + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
> + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfe_u32_arg_arg_imm
> +; SI: V_BFE_U32
> +; EG: BFE_UINT
> +define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
> + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
> + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfe_u32_arg_imm_arg
> +; SI: V_BFE_U32
> +; EG: BFE_UINT
> +define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
> + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
> + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfe_u32_imm_arg_arg
> +; SI: V_BFE_U32
> +; EG: BFE_UINT
> +define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
> + %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
> + store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> Index: test/CodeGen/R600/llvm.AMDGPU.bfi.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/llvm.AMDGPU.bfi.ll
> @@ -0,0 +1,41 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +declare i32 @llvm.AMDGPU.bfi(i32, i32, i32) nounwind readnone
> +
> +; FUNC-LABEL: @bfi_arg_arg_arg
> +; SI: V_BFI_B32
> +; EG: BFI_INT
> +define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
> + %bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
> + store i32 %bfi, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfi_arg_arg_imm
> +; SI: V_BFI_B32
> +; EG: BFI_INT
> +define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
> + %bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 123) nounwind readnone
> + store i32 %bfi, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfi_arg_imm_arg
> +; SI: V_BFI_B32
> +; EG: BFI_INT
> +define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
> + %bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 123, i32 %src2) nounwind readnone
> + store i32 %bfi, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfi_imm_arg_arg
> +; SI: V_BFI_B32
> +; EG: BFI_INT
> +define void @bfi_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
> + %bfi = call i32 @llvm.AMDGPU.bfi(i32 123, i32 %src1, i32 %src2) nounwind readnone
> + store i32 %bfi, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> Index: test/CodeGen/R600/llvm.AMDGPU.bfm.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/llvm.AMDGPU.bfm.ll
> @@ -0,0 +1,40 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone
> +
> +; FUNC-LABEL: @bfm_arg_arg
> +; SI: V_BFM
> +; EG: BFM_INT
> +define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
> + %bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 %src1) nounwind readnone
> + store i32 %bfm, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfm_arg_imm
> +; SI: V_BFM
> +; EG: BFM_INT
> +define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
> + %bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 123) nounwind readnone
> + store i32 %bfm, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfm_imm_arg
> +; SI: V_BFM
> +; EG: BFM_INT
> +define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
> + %bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 %src1) nounwind readnone
> + store i32 %bfm, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +; FUNC-LABEL: @bfm_imm_imm
> +; SI: V_BFM
> +; EG: BFM_INT
> +define void @bfm_imm_imm(i32 addrspace(1)* %out) nounwind {
> + %bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 456) nounwind readnone
> + store i32 %bfm, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list