[PATCH] R600: Add intrinsics for math helper instructions

Matt Arsenault Matthew.Arsenault at amd.com
Tue Jun 17 10:05:19 PDT 2014


On 06/17/2014 07:10 AM, Tom Stellard wrote:
> On Tue, Jun 17, 2014 at 12:28:14AM +0000, Matt Arsenault wrote:
>> These will be used in the implementations of custom lowering of and library implementations of various math functions, so it's useful to expose these as builtins.
>>
>> http://reviews.llvm.org/D4168
>>
>> Files:
>>    include/llvm/IR/IntrinsicsR600.td
>>    lib/Target/R600/AMDGPUISelLowering.cpp
>>    lib/Target/R600/AMDGPUISelLowering.h
>>    lib/Target/R600/AMDGPUInstrInfo.td
>>    lib/Target/R600/AMDGPUInstructions.td
>>    lib/Target/R600/AMDGPUIntrinsics.td
>>    lib/Target/R600/SIInsertWaits.cpp
>>    lib/Target/R600/SIInstructions.td
>>    lib/Transforms/InstCombine/InstCombineCalls.cpp
>>    test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
>>    test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
>>    test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
>>    test/CodeGen/R600/llvm.AMDGPU.rcp.ll
>>    test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
>>    test/Transforms/InstCombine/r600-intrinsics.ll
>> Index: include/llvm/IR/IntrinsicsR600.td
>> ===================================================================
>> --- include/llvm/IR/IntrinsicsR600.td
>> +++ include/llvm/IR/IntrinsicsR600.td
>> @@ -33,4 +33,34 @@
>>                                          "__builtin_r600_read_tgid">;
>>   defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
>>                                          "__builtin_r600_read_tidig">;
>> +
>>   } // End TargetPrefix = "r600"
>> +
>> +let TargetPrefix = "AMDGPU" in {
>> +def int_AMDGPU_div_scale :
>> +  Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
>> +            [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
>> +            GCCBuiltin<"__builtin_r600_div_scale">;
> I think we should replace the r600 in the builtin name with amdgpu, this will
> prevent some confusion about what hardware is supported on.
>
>
>> +
>> +def int_AMDGPU_div_fmas :
>> +  Intrinsic<[llvm_anyfloat_ty],
>> +            [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
>> +            [IntrNoMem]>,
>> +            GCCBuiltin<"__builtin_r600_div_fmas">;
>> +
>> +def int_AMDGPU_div_fixup :
>> +  Intrinsic<[llvm_anyfloat_ty],
>> +            [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
>> +            GCCBuiltin<"__builtin_r600_div_fixup">;
>> +
>> +def int_AMDGPU_trig_preop :
>> +  Intrinsic<[llvm_anyfloat_ty],
>> +            [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>,
>> +            GCCBuiltin<"__builtin_r600_trig_preop">;
>> +
>> +def int_AMDGPU_rcp :
>> +  Intrinsic<[llvm_anyfloat_ty],
>> +            [LLVMMatchType<0>], [IntrNoMem]>,
>> +            GCCBuiltin<"__builtin_r600_rcp">;
>> +
>> +} // End TargetPrefix = "AMDGPU"
>> Index: lib/Target/R600/AMDGPUISelLowering.cpp
>> ===================================================================
>> --- lib/Target/R600/AMDGPUISelLowering.cpp
>> +++ lib/Target/R600/AMDGPUISelLowering.cpp
>> @@ -896,6 +896,25 @@
>>   
>>       case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
>>         return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
>> +
>> +    case Intrinsic::AMDGPU_div_scale:
>> +      return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
>> +                         Op.getOperand(1), Op.getOperand(2));
>> +
>> +    case Intrinsic::AMDGPU_div_fmas:
>> +      return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
>> +                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
>> +
>> +    case Intrinsic::AMDGPU_div_fixup:
>> +      return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
>> +                         Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
>> +
>> +    case Intrinsic::AMDGPU_trig_preop:
>> +      return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
>> +                         Op.getOperand(1), Op.getOperand(2));
>> +
>> +    case Intrinsic::AMDGPU_rcp:
>> +      return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
>>     }
>>   }
>>   
>> @@ -1912,6 +1931,13 @@
>>     NODE_NAME_CASE(FMIN)
>>     NODE_NAME_CASE(SMIN)
>>     NODE_NAME_CASE(UMIN)
>> +  NODE_NAME_CASE(URECIP)
>> +  NODE_NAME_CASE(DIV_SCALE)
>> +  NODE_NAME_CASE(DIV_FMAS)
>> +  NODE_NAME_CASE(DIV_FIXUP)
>> +  NODE_NAME_CASE(TRIG_PREOP)
>> +  NODE_NAME_CASE(RCP)
>> +  NODE_NAME_CASE(DOT4)
>>     NODE_NAME_CASE(BFE_U32)
>>     NODE_NAME_CASE(BFE_I32)
>>     NODE_NAME_CASE(BFI)
>> @@ -1920,8 +1946,6 @@
>>     NODE_NAME_CASE(MUL_I24)
>>     NODE_NAME_CASE(MAD_U24)
>>     NODE_NAME_CASE(MAD_I24)
>> -  NODE_NAME_CASE(URECIP)
>> -  NODE_NAME_CASE(DOT4)
>>     NODE_NAME_CASE(EXPORT)
>>     NODE_NAME_CASE(CONST_ADDRESS)
>>     NODE_NAME_CASE(REGISTER_LOAD)
>> Index: lib/Target/R600/AMDGPUISelLowering.h
>> ===================================================================
>> --- lib/Target/R600/AMDGPUISelLowering.h
>> +++ lib/Target/R600/AMDGPUISelLowering.h
>> @@ -179,6 +179,11 @@
>>     SMIN,
>>     UMIN,
>>     URECIP,
>> +  DIV_SCALE,
>> +  DIV_FMAS,
>> +  DIV_FIXUP,
>> +  TRIG_PREOP,
>> +  RCP,
>>     DOT4,
>>     BFE_U32, // Extract range of bits with zero extension to 32-bits.
>>     BFE_I32, // Extract range of bits with sign extension to 32-bits.
>> Index: lib/Target/R600/AMDGPUInstrInfo.td
>> ===================================================================
>> --- lib/Target/R600/AMDGPUInstrInfo.td
>> +++ lib/Target/R600/AMDGPUInstrInfo.td
>> @@ -19,6 +19,14 @@
>>     SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
>>   ]>;
>>   
>> +def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
>> +  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
>> +>;
>> +
>> +def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
>> +  [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>> +>;
>> +
>>   //===----------------------------------------------------------------------===//
>>   // AMDGPU DAG Nodes
>>   //
>> @@ -29,6 +37,9 @@
>>   // out = a - floor(a)
>>   def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
>>   
>> +// out = 1.0 / a
>> +def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
>> +
>>   // out = max(a, b) a and b are floats
>>   def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
>>     [SDNPCommutative, SDNPAssociative]
>> @@ -78,6 +89,21 @@
>>   // e is rounding error
>>   def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
>>   
>> +// Special case divide preop and flags.
>> +def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
>> +
>> +//  Special case divide FMA with scale and flags (src0 = Quotient,
>> +//  src1 = Denominator, src2 = Numerator).
>> +def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
>> +
>> +// Single or double precision division fixup.
>> +// Special case divide fixup and flags(src0 = Quotient, src1 =
>> +// Denominator, src2 = Numerator).
>> +def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
>> +
>> +// Look Up 2.0 / pi src0 with segment select src1[4:0]
>> +def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
>> +
>>   def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
>>                             SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
>>                             [SDNPHasChain, SDNPMayLoad]>;
>> Index: lib/Target/R600/AMDGPUInstructions.td
>> ===================================================================
>> --- lib/Target/R600/AMDGPUInstructions.td
>> +++ lib/Target/R600/AMDGPUInstructions.td
>> @@ -519,6 +519,16 @@
>>     >;
>>   }
>>   
>> +class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
>> +  (fdiv FP_ONE, vt:$src),
>> +  (RcpInst $src)
>> +>;
>> +
>> +class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
>> +  (AMDGPUrcp (fsqrt vt:$src)),
>> +  (RsqInst $src)
>> +>;
>> +
> Do RCP and RSQ have IEEE precision?
I'm unclear on this. Different sources seem to conflict. One place I've 
found claims it has 1 ulp precision, other say it is "approximate" with 
no mention of IEEE. If it is there's not really any reason to add the 
intrinsic for it




More information about the llvm-commits mailing list