[PATCH] R600/SI: implement range reduction for sin/cos

Sat Jul 19 11:24:15 PDT 2014

Yes, please do.

On 19. Juli 2014 20:04:07 MESZ, Matt Arsenault <arsenm2 at gmail.com> wrote:
>
>On Jul 19, 2014, at 6:16 AM, Grigori Goronzy <greg at chown.ath.cx> wrote:
>
>> These instructions can only take a limited input range, and return
>> the constant value 1 out of range. We should do range reduction to
>> be able to process arbitrary values. Use a FRACT instruction after
>> normalization to achieve this. Also add a test for constant folding
>> with the lowered code with unsafe-fp-math enabled.
>> 
>> v2: use DAG lowering instead of intrinsic, adapt test
>> v3: calculate constant, fold pattern into instruction definition
>> v4: misc style fixes, add sin-fold testcase, cosmetics
>> ---
>> lib/Target/R600/AMDGPUInstrInfo.td |  3 +++
>> lib/Target/R600/SIISelLowering.cpp | 23 +++++++++++++++++++++++
>> lib/Target/R600/SIISelLowering.h   |  1 +
>> lib/Target/R600/SIInstructions.td  | 18 ++++++------------
>> test/CodeGen/R600/llvm.sin.ll      | 22 ++++++++++++++++++++--
>> 5 files changed, 53 insertions(+), 14 deletions(-)
>> 
>> diff --git a/lib/Target/R600/AMDGPUInstrInfo.td
>b/lib/Target/R600/AMDGPUInstrInfo.td
>> index 934d59d..820f1a8 100644
>> --- a/lib/Target/R600/AMDGPUInstrInfo.td
>> +++ b/lib/Target/R600/AMDGPUInstrInfo.td
>> @@ -34,6 +34,9 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
>> // This argument to this node is a dword address.
>> def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
>> 
>> +def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
>> +def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
>> +
>> // out = a - floor(a)
>> def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
>> 
>> diff --git a/lib/Target/R600/SIISelLowering.cpp
>b/lib/Target/R600/SIISelLowering.cpp
>> index b3429b9..86997c8 100644
>> --- a/lib/Target/R600/SIISelLowering.cpp
>> +++ b/lib/Target/R600/SIISelLowering.cpp
>> @@ -80,6 +80,9 @@ SITargetLowering::SITargetLowering(TargetMachine
>&TM) :
>>   setOperationAction(ISD::SUBC, MVT::i32, Legal);
>>   setOperationAction(ISD::SUBE, MVT::i32, Legal);
>> 
>> +  setOperationAction(ISD::FSIN, MVT::f32, Custom);
>> +  setOperationAction(ISD::FCOS, MVT::f32, Custom);
>> +
>>   // We need to custom lower vector stores from local memory
>>   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
>>   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
>> @@ -637,6 +640,9 @@ SDValue SITargetLowering::LowerOperation(SDValue
>Op, SelectionDAG &DAG) const {
>>     }
>>   }
>> 
>> +  case ISD::FSIN:
>> +  case ISD::FCOS:
>> +    return LowerTrig(Op, DAG);
>>   case ISD::SELECT: return LowerSELECT(Op, DAG);
>>   case ISD::FDIV: return LowerFDIV(Op, DAG);
>>   case ISD::STORE: return LowerSTORE(Op, DAG);
>> @@ -1116,6 +1122,23 @@ SDValue SITargetLowering::LowerSTORE(SDValue
>Op, SelectionDAG &DAG) const {
>>   return Chain;
>> }
>> 
>> +SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG)
>const {
>> +  EVT VT = Op.getValueType();
>> +  SDValue Arg = Op.getOperand(0);
>> +  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
>> +        DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
>> +          DAG.getConstantFP(0.5 / M_PI, VT)));
>> +
>> +  switch (Op.getOpcode()) {
>> +  case ISD::FCOS:
>> +    return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
>> +  case ISD::FSIN:
>> +    return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
>> +  default:
>> +    llvm_unreachable("Wrong trig opcode");
>> +  }
>> +}
>> +
>>
>//===----------------------------------------------------------------------===//
>> // Custom DAG optimizations
>>
>//===----------------------------------------------------------------------===//
>> diff --git a/lib/Target/R600/SIISelLowering.h
>b/lib/Target/R600/SIISelLowering.h
>> index 9e9a0b0..b3343ee 100644
>> --- a/lib/Target/R600/SIISelLowering.h
>> +++ b/lib/Target/R600/SIISelLowering.h
>> @@ -32,6 +32,7 @@ class SITargetLowering : public
>AMDGPUTargetLowering {
>>   SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
>>   SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
>>   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
>> +  SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
>>   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
>> 
>>   bool foldImm(SDValue &Operand, int32_t &Immediate,
>> diff --git a/lib/Target/R600/SIInstructions.td
>b/lib/Target/R600/SIInstructions.td
>> index a4920db..bd5be32 100644
>> --- a/lib/Target/R600/SIInstructions.td
>> +++ b/lib/Target/R600/SIInstructions.td
>> @@ -1167,8 +1167,12 @@ defm V_SQRT_F32 : VOP1_32 <0x00000033,
>"V_SQRT_F32",
>> defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
>>   [(set f64:$dst, (fsqrt f64:$src0))]
>>> ;
>> -defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
>> -defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
>> +defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32",
>> +  [(set f32:$dst, (AMDGPUsin f32:$src0))]
>> +>;
>> +defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32",
>> +  [(set f32:$dst, (AMDGPUcos f32:$src0))]
>> +>;
>> defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
>> defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
>> defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
>> @@ -2343,16 +2347,6 @@ def : Pat<
>>> ;
>> 
>> def : Pat <
>> -  (fcos f32:$src0),
>> -  (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32
>CONST.TWO_PI_INV)))
>> ->;
>> -
>> -def : Pat <
>> -  (fsin f32:$src0),
>> -  (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32
>CONST.TWO_PI_INV)))
>> ->;
>> -
>> -def : Pat <
>>   (int_AMDGPU_cube v4f32:$src),
>>   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32
>(IMPLICIT_DEF)),
>>     (V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
>> diff --git a/test/CodeGen/R600/llvm.sin.ll
>b/test/CodeGen/R600/llvm.sin.ll
>> index 41c363c..53006ba 100644
>> --- a/test/CodeGen/R600/llvm.sin.ll
>> +++ b/test/CodeGen/R600/llvm.sin.ll
>> @@ -1,5 +1,6 @@
>> -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>-check-prefix=EG -check-prefix=FUNC
>> -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI
>-check-prefix=FUNC
>> +;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck
>-check-prefix=EG -check-prefix=FUNC %s
>> +;RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI
>-check-prefix=SI-SAFE -check-prefix=FUNC %s
>> +;RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s |
>FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC
>%s
>> 
>> ;FUNC-LABEL: test
>> ;EG: MULADD_IEEE *
>> @@ -8,6 +9,7 @@
>> ;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> ;EG-NOT: SIN
>> ;SI: V_MUL_F32
>> +;SI: V_FRACT_F32
>> ;SI: V_SIN_F32
>> ;SI-NOT: V_SIN_F32
>> 
>> @@ -17,6 +19,22 @@ define void @test(float addrspace(1)* %out, float
>%x) #1 {
>>    ret void
>> }
>> 
>> +;FUNC-LABEL: testf
>> +;SI-UNSAFE: 4.774
>> +;SI-UNSAFE: V_MUL_F32
>> +;SI-SAFE: V_MUL_F32
>> +;SI-SAFE: V_MUL_F32
>> +;SI: V_FRACT_F32
>> +;SI: V_SIN_F32
>> +;SI-NOT: V_SIN_F32
>> +
>> +define void @testf(float addrspace(1)* %out, float %x) #1 {
>> +   %y = fmul float 3.0, %x
>> +   %sin = call float @llvm.sin.f32(float %y)
>> +   store float %sin, float addrspace(1)* %out
>> +   ret void
>> +}
>> +
>> ;FUNC-LABEL: testv
>> ;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> ;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>> -- 
>> 1.8.3.2
>
>Do you need me to commit this for you?

-- 
Sent from my phone.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140719/917b5e1b/attachment.html>