[PATCH] R600: Add intrinsics for math helper instructions
Tom Stellard
tom at stellard.net
Tue Jun 17 10:22:06 PDT 2014
On Tue, Jun 17, 2014 at 10:05:19AM -0700, Matt Arsenault wrote:
> On 06/17/2014 07:10 AM, Tom Stellard wrote:
> >On Tue, Jun 17, 2014 at 12:28:14AM +0000, Matt Arsenault wrote:
> >>These will be used in the implementations of custom lowering of and library implementations of various math functions, so it's useful to expose these as builtins.
> >>
> >>http://reviews.llvm.org/D4168
> >>
> >>Files:
> >> include/llvm/IR/IntrinsicsR600.td
> >> lib/Target/R600/AMDGPUISelLowering.cpp
> >> lib/Target/R600/AMDGPUISelLowering.h
> >> lib/Target/R600/AMDGPUInstrInfo.td
> >> lib/Target/R600/AMDGPUInstructions.td
> >> lib/Target/R600/AMDGPUIntrinsics.td
> >> lib/Target/R600/SIInsertWaits.cpp
> >> lib/Target/R600/SIInstructions.td
> >> lib/Transforms/InstCombine/InstCombineCalls.cpp
> >> test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
> >> test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
> >> test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
> >> test/CodeGen/R600/llvm.AMDGPU.rcp.ll
> >> test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
> >> test/Transforms/InstCombine/r600-intrinsics.ll
> >>Index: include/llvm/IR/IntrinsicsR600.td
> >>===================================================================
> >>--- include/llvm/IR/IntrinsicsR600.td
> >>+++ include/llvm/IR/IntrinsicsR600.td
> >>@@ -33,4 +33,34 @@
> >> "__builtin_r600_read_tgid">;
> >> defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
> >> "__builtin_r600_read_tidig">;
> >>+
> >> } // End TargetPrefix = "r600"
> >>+
> >>+let TargetPrefix = "AMDGPU" in {
> >>+def int_AMDGPU_div_scale :
> >>+ Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
> >>+ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
> >>+ GCCBuiltin<"__builtin_r600_div_scale">;
> >I think we should replace the r600 in the builtin name with amdgpu, this will
> >prevent some confusion about what hardware is supported on.
> >
> >
> >>+
> >>+def int_AMDGPU_div_fmas :
> >>+ Intrinsic<[llvm_anyfloat_ty],
> >>+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
> >>+ [IntrNoMem]>,
> >>+ GCCBuiltin<"__builtin_r600_div_fmas">;
> >>+
> >>+def int_AMDGPU_div_fixup :
> >>+ Intrinsic<[llvm_anyfloat_ty],
> >>+ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>,
> >>+ GCCBuiltin<"__builtin_r600_div_fixup">;
> >>+
> >>+def int_AMDGPU_trig_preop :
> >>+ Intrinsic<[llvm_anyfloat_ty],
> >>+ [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>,
> >>+ GCCBuiltin<"__builtin_r600_trig_preop">;
> >>+
> >>+def int_AMDGPU_rcp :
> >>+ Intrinsic<[llvm_anyfloat_ty],
> >>+ [LLVMMatchType<0>], [IntrNoMem]>,
> >>+ GCCBuiltin<"__builtin_r600_rcp">;
> >>+
> >>+} // End TargetPrefix = "AMDGPU"
> >>Index: lib/Target/R600/AMDGPUISelLowering.cpp
> >>===================================================================
> >>--- lib/Target/R600/AMDGPUISelLowering.cpp
> >>+++ lib/Target/R600/AMDGPUISelLowering.cpp
> >>@@ -896,6 +896,25 @@
> >> case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
> >> return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
> >>+
> >>+ case Intrinsic::AMDGPU_div_scale:
> >>+ return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
> >>+ Op.getOperand(1), Op.getOperand(2));
> >>+
> >>+ case Intrinsic::AMDGPU_div_fmas:
> >>+ return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
> >>+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
> >>+
> >>+ case Intrinsic::AMDGPU_div_fixup:
> >>+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
> >>+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
> >>+
> >>+ case Intrinsic::AMDGPU_trig_preop:
> >>+ return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
> >>+ Op.getOperand(1), Op.getOperand(2));
> >>+
> >>+ case Intrinsic::AMDGPU_rcp:
> >>+ return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
> >> }
> >> }
> >>@@ -1912,6 +1931,13 @@
> >> NODE_NAME_CASE(FMIN)
> >> NODE_NAME_CASE(SMIN)
> >> NODE_NAME_CASE(UMIN)
> >>+ NODE_NAME_CASE(URECIP)
> >>+ NODE_NAME_CASE(DIV_SCALE)
> >>+ NODE_NAME_CASE(DIV_FMAS)
> >>+ NODE_NAME_CASE(DIV_FIXUP)
> >>+ NODE_NAME_CASE(TRIG_PREOP)
> >>+ NODE_NAME_CASE(RCP)
> >>+ NODE_NAME_CASE(DOT4)
> >> NODE_NAME_CASE(BFE_U32)
> >> NODE_NAME_CASE(BFE_I32)
> >> NODE_NAME_CASE(BFI)
> >>@@ -1920,8 +1946,6 @@
> >> NODE_NAME_CASE(MUL_I24)
> >> NODE_NAME_CASE(MAD_U24)
> >> NODE_NAME_CASE(MAD_I24)
> >>- NODE_NAME_CASE(URECIP)
> >>- NODE_NAME_CASE(DOT4)
> >> NODE_NAME_CASE(EXPORT)
> >> NODE_NAME_CASE(CONST_ADDRESS)
> >> NODE_NAME_CASE(REGISTER_LOAD)
> >>Index: lib/Target/R600/AMDGPUISelLowering.h
> >>===================================================================
> >>--- lib/Target/R600/AMDGPUISelLowering.h
> >>+++ lib/Target/R600/AMDGPUISelLowering.h
> >>@@ -179,6 +179,11 @@
> >> SMIN,
> >> UMIN,
> >> URECIP,
> >>+ DIV_SCALE,
> >>+ DIV_FMAS,
> >>+ DIV_FIXUP,
> >>+ TRIG_PREOP,
> >>+ RCP,
> >> DOT4,
> >> BFE_U32, // Extract range of bits with zero extension to 32-bits.
> >> BFE_I32, // Extract range of bits with sign extension to 32-bits.
> >>Index: lib/Target/R600/AMDGPUInstrInfo.td
> >>===================================================================
> >>--- lib/Target/R600/AMDGPUInstrInfo.td
> >>+++ lib/Target/R600/AMDGPUInstrInfo.td
> >>@@ -19,6 +19,14 @@
> >> SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
> >> ]>;
> >>+def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
> >>+ [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
> >>+>;
> >>+
> >>+def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
> >>+ [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
> >>+>;
> >>+
> >> //===----------------------------------------------------------------------===//
> >> // AMDGPU DAG Nodes
> >> //
> >>@@ -29,6 +37,9 @@
> >> // out = a - floor(a)
> >> def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
> >>+// out = 1.0 / a
> >>+def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
> >>+
> >> // out = max(a, b) a and b are floats
> >> def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
> >> [SDNPCommutative, SDNPAssociative]
> >>@@ -78,6 +89,21 @@
> >> // e is rounding error
> >> def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
> >>+// Special case divide preop and flags.
> >>+def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
> >>+
> >>+// Special case divide FMA with scale and flags (src0 = Quotient,
> >>+// src1 = Denominator, src2 = Numerator).
> >>+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
> >>+
> >>+// Single or double precision division fixup.
> >>+// Special case divide fixup and flags(src0 = Quotient, src1 =
> >>+// Denominator, src2 = Numerator).
> >>+def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
> >>+
> >>+// Look Up 2.0 / pi src0 with segment select src1[4:0]
> >>+def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
> >>+
> >> def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
> >> SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
> >> [SDNPHasChain, SDNPMayLoad]>;
> >>Index: lib/Target/R600/AMDGPUInstructions.td
> >>===================================================================
> >>--- lib/Target/R600/AMDGPUInstructions.td
> >>+++ lib/Target/R600/AMDGPUInstructions.td
> >>@@ -519,6 +519,16 @@
> >> >;
> >> }
> >>+class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
> >>+ (fdiv FP_ONE, vt:$src),
> >>+ (RcpInst $src)
> >>+>;
> >>+
> >>+class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
> >>+ (AMDGPUrcp (fsqrt vt:$src)),
> >>+ (RsqInst $src)
> >>+>;
> >>+
> >Do RCP and RSQ have IEEE precision?
> I'm unclear on this. Different sources seem to conflict. One place
> I've found claims it has 1 ulp precision, other say it is
> "approximate" with no mention of IEEE. If it is there's not really
> any reason to add the intrinsic for it
>
I think we should add comments defining the precisions of the intrinsic
and the SDNode and if we define it as approximate we should call it
something else, like rcp_approx or rcp_native.
-Tom
More information about the llvm-commits
mailing list