[PATCH] R600: Handle fcopysign

Tue Jun 10 10:33:08 PDT 2014

LGTM.

On Thu, Jun 05, 2014 at 09:20:58AM +0000, Matt Arsenault wrote:
> http://reviews.llvm.org/D4029
> 
> Files:
>   lib/Target/R600/AMDGPUISelLowering.cpp
>   lib/Target/R600/AMDGPUInstructions.td
>   lib/Target/R600/AMDGPUSubtarget.h
>   lib/Target/R600/EvergreenInstructions.td
>   lib/Target/R600/SIInstructions.td
>   test/CodeGen/R600/fcopysign.f32.ll
>   test/CodeGen/R600/fcopysign.f64.ll

> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -217,6 +217,12 @@
>    setOperationAction(ISD::FREM, MVT::f64, Custom);
>    setOperationAction(ISD::FRINT, MVT::f64, Custom);
>  
> +  if (!Subtarget->hasBFI()) {
> +    // fcopysign can be done in a single instruction with BFI.
> +    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
> +    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
> +  }
> +
>    if (!Subtarget->hasBCNT(32))
>      setOperationAction(ISD::CTPOP, MVT::i32, Expand);
>  
> @@ -279,6 +285,7 @@
>      setOperationAction(ISD::FNEG, VT, Expand);
>      setOperationAction(ISD::SELECT, VT, Expand);
>      setOperationAction(ISD::VSELECT, VT, Expand);
> +    setOperationAction(ISD::FCOPYSIGN, VT, Expand);
>    }
>  
>    setTargetDAGCombine(ISD::MUL);
> Index: lib/Target/R600/AMDGPUInstructions.td
> ===================================================================
> --- lib/Target/R600/AMDGPUInstructions.td
> +++ lib/Target/R600/AMDGPUInstructions.td
> @@ -365,7 +365,7 @@
>  
>  // BFI_INT patterns
>  
> -multiclass BFIPatterns <Instruction BFI_INT> {
> +multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
>  
>    // Definition from ISA doc:
>    // (y & x) | (z & ~x)
> @@ -381,6 +381,19 @@
>      (BFI_INT $x, $y, $z)
>    >;
>  
> +  def : Pat <
> +    (fcopysign f32:$src0, f32:$src1),
> +    (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)
> +  >;
> +
> +  def : Pat <
> +    (f64 (fcopysign f64:$src0, f64:$src1)),
> +      (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
> +      (i32 (EXTRACT_SUBREG $src0, sub0)), sub0),
> +      (BFI_INT (LoadImm32 0x7fffffff),
> +               (i32 (EXTRACT_SUBREG $src0, sub1)),
> +               (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
> +  >;
>  }
>  
>  // SHA-256 Ma patterns
> Index: lib/Target/R600/AMDGPUSubtarget.h
> ===================================================================
> --- lib/Target/R600/AMDGPUSubtarget.h
> +++ lib/Target/R600/AMDGPUSubtarget.h
> @@ -72,6 +72,10 @@
>      return (getGeneration() >= EVERGREEN);
>    }
>  
> +  bool hasBFI() const {
> +    return (getGeneration() >= EVERGREEN);
> +  }
> +
>    bool hasBFM() const {
>      return hasBFE();
>    }
> Index: lib/Target/R600/EvergreenInstructions.td
> ===================================================================
> --- lib/Target/R600/EvergreenInstructions.td
> +++ lib/Target/R600/EvergreenInstructions.td
> @@ -295,7 +295,7 @@
>  def : Pat<(i32 (sext_inreg i32:$src, i16)),
>    (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
>  
> -defm : BFIPatterns <BFI_INT_eg>;
> +defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32>;
>  
>  def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
>    [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -2140,7 +2140,7 @@
>    (V_MUL_HI_I32 $src0, $src1, (i32 0))
>  >;
>  
> -defm : BFIPatterns <V_BFI_B32>;
> +defm : BFIPatterns <V_BFI_B32, S_MOV_B32>;
>  def : ROTRPattern <V_ALIGNBIT_B32>;
>  
>  /********** ======================= **********/
> Index: test/CodeGen/R600/fcopysign.f32.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/fcopysign.f32.ll
> @@ -0,0 +1,50 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
> +
> +
> +declare float @llvm.copysign.f32(float, float) nounwind readnone
> +declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone
> +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone
> +
> +; Try to identify arg based on higher address.
> +; FUNC-LABEL: @test_copysign_f32:
> +; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc
> +; SI: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
> +; SI-DAG: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb
> +; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
> +; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
> +; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
> +; SI: BUFFER_STORE_DWORD [[RESULT]],
> +; SI: S_ENDPGM
> +
> +; EG: BFI_INT
> +define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
> +  %result = call float @llvm.copysign.f32(float %mag, float %sign)
> +  store float %result, float addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +; FUNC-LABEL: @test_copysign_v2f32:
> +; SI: S_ENDPGM
> +
> +; EG: BFI_INT
> +; EG: BFI_INT
> +define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %mag, <2 x float> %sign) nounwind {
> +  %result = call <2 x float> @llvm.copysign.v2f32(<2 x float> %mag, <2 x float> %sign)
> +  store <2 x float> %result, <2 x float> addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @test_copysign_v4f32:
> +; SI: S_ENDPGM
> +
> +; EG: BFI_INT
> +; EG: BFI_INT
> +; EG: BFI_INT
> +; EG: BFI_INT
> +define void @test_copysign_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %mag, <4 x float> %sign) nounwind {
> +  %result = call <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sign)
> +  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
> +  ret void
> +}
> +
> Index: test/CodeGen/R600/fcopysign.f64.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/fcopysign.f64.ll
> @@ -0,0 +1,37 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +
> +declare double @llvm.copysign.f64(double, double) nounwind readnone
> +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
> +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
> +
> +; FUNC-LABEL: @test_copysign_f64:
> +; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
> +; SI: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
> +; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
> +; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
> +; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
> +; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
> +; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
> +; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
> +; SI: S_ENDPGM
> +define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
> +  %result = call double @llvm.copysign.f64(double %mag, double %sign)
> +  store double %result, double addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @test_copysign_v2f64:
> +; SI: S_ENDPGM
> +define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
> +  %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
> +  store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @test_copysign_v4f64:
> +; SI: S_ENDPGM
> +define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
> +  %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
> +  store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
> +  ret void
> +}

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits