[PATCH] R600: Add support for ISD::FROUND

Mon Nov 25 10:47:54 PST 2013

Great! Thank you. :)

On Mon, Nov 25, 2013 at 10:41 AM, Tom Stellard <tom at stellard.net> wrote:

> On Sun, Nov 24, 2013 at 09:32:59PM -0800, Bill Wendling wrote:
> > Hi Tom,
> >
> > All candidates should be in the trunk first, unless they fix a bug
> that’s solely in the branch. :)
> >
>
> Hi Bill,
>
> In Mesa, we mark stable candidates with something like:
>
> NOTE: This is a candidate for the 3.4 branch.
>
> So that we can have a script auto cherry-pick fixes to the stable
> branch.  I add it to my LLVM commits, so I can use the same script to
> verify that all my bugfixes made it into the stable branch.  I will
> email you once I've committed it to trunk.
>
> -Tom
>
>
> > -bw
> >
> > On Nov 22, 2013, at 7:04 PM, Tom Stellard <tom at stellard.net> wrote:
> >
> > > From: Tom Stellard <thomas.stellard at amd.com>
> > >
> > > NOTE: This is a candidate for the 3.4 branch.
> > > ---
> > > lib/Target/R600/AMDGPUISelLowering.cpp |  1 +
> > > lib/Target/R600/AMDGPUInstrInfo.td     |  3 +++
> > > lib/Target/R600/R600Instructions.td    | 18 +++++++++++----
> > > test/CodeGen/R600/llvm.round.ll        | 41
> ++++++++++++++++++++++++++++++++++
> > > 4 files changed, 59 insertions(+), 4 deletions(-)
> > > create mode 100644 test/CodeGen/R600/llvm.round.ll
> > >
> > > diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp
> b/lib/Target/R600/AMDGPUISelLowering.cpp
> > > index fdabea5..f2a6aab 100644
> > > --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> > > +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> > > @@ -58,6 +58,7 @@
> AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> > >   setOperationAction(ISD::FABS,   MVT::f32, Legal);
> > >   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
> > >   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
> > > +  setOperationAction(ISD::FROUND, MVT::f32, Legal);
> > >
> > >   // The hardware supports ROTR, but not ROTL
> > >   setOperationAction(ISD::ROTL, MVT::i32, Expand);
> > > diff --git a/lib/Target/R600/AMDGPUInstrInfo.td
> b/lib/Target/R600/AMDGPUInstrInfo.td
> > > index c0d757e..fccede0 100644
> > > --- a/lib/Target/R600/AMDGPUInstrInfo.td
> > > +++ b/lib/Target/R600/AMDGPUInstrInfo.td
> > > @@ -83,3 +83,6 @@ def AMDGPUregister_store :
> SDNode<"AMDGPUISD::REGISTER_STORE",
> > > def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
> > >                         SDTypeProfile<0, 2, []>,
> > >                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
> > > +
> > > +def AMDGPUround : SDNode<"ISD::FROUND",
> > > +                         SDTypeProfile<1, 1, [SDTCisFP<0>,
> SDTCisSameAs<0,1>]>>;
> > > diff --git a/lib/Target/R600/R600Instructions.td
> b/lib/Target/R600/R600Instructions.td
> > > index abfde50..0346e24 100644
> > > --- a/lib/Target/R600/R600Instructions.td
> > > +++ b/lib/Target/R600/R600Instructions.td
> > > @@ -1110,6 +1110,10 @@ class COS_Common <bits<11> inst> : R600_1OP <
> > >   let Itinerary = TransALU;
> > > }
> > >
> > > +def CLAMP_R600 :  CLAMP <R600_Reg32>;
> > > +def FABS_R600 : FABS<R600_Reg32>;
> > > +def FNEG_R600 : FNEG<R600_Reg32>;
> > > +
> > >
> //===----------------------------------------------------------------------===//
> > > // Helper patterns for complex intrinsics
> > >
> //===----------------------------------------------------------------------===//
> > > @@ -1132,6 +1136,13 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit,
> InstR600 log_clamped, InstR600 exp_ie
> > >   (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w,
> $src_x))
> > >> ;
> > >
> > > +// FROUND pattern
> > > +class FROUNDPat<Instruction CNDGE> : Pat <
> > > +  (AMDGPUround f32:$x),
> > > +  (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR
> $x))
> > > +>;
> > > +
> > > +
> > >
> //===----------------------------------------------------------------------===//
> > > // R600 / R700 Instructions
> > >
> //===----------------------------------------------------------------------===//
> > > @@ -1173,6 +1184,7 @@ let Predicates = [isR600] in {
> > >   def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600,
> LOG_CLAMPED_r600, EXP_IEEE_r600>;
> > >
> > >   def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600
> $src))>;
> > > +  def : FROUNDPat <CNDGE_r600>;
> > >
> > >   def R600_ExportSwz : ExportSwzInst {
> > >     let Word1{20-17} = 0; // BURST_COUNT
> > > @@ -1726,6 +1738,8 @@ def LDS_USHORT_READ_RET : R600_LDS_1A <0x39,
> "LDS_USHORT_READ_RET",
> > >   // SHA-256 Patterns
> > >   def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
> > >
> > > +  def : FROUNDPat <CNDGE_eg>;
> > > +
> > >   def EG_ExportSwz : ExportSwzInst {
> > >     let Word1{19-16} = 0; // BURST_COUNT
> > >     let Word1{20} = 0; // VALID_PIXEL_MODE
> > > @@ -2090,10 +2104,6 @@ def TXD_SHADOW: InstR600 <
> > > } // End isPseudo = 1
> > > } // End usesCustomInserter = 1
> > >
> > > -def CLAMP_R600 :  CLAMP <R600_Reg32>;
> > > -def FABS_R600 : FABS<R600_Reg32>;
> > > -def FNEG_R600 : FNEG<R600_Reg32>;
> > > -
> > >
> //===---------------------------------------------------------------------===//
> > > // Return instruction
> > >
> //===---------------------------------------------------------------------===//
> > > diff --git a/test/CodeGen/R600/llvm.round.ll
> b/test/CodeGen/R600/llvm.round.ll
> > > new file mode 100644
> > > index 0000000..e06d45d
> > > --- /dev/null
> > > +++ b/test/CodeGen/R600/llvm.round.ll
> > > @@ -0,0 +1,41 @@
> > > +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> --check-prefix=R600 --check-prefix=FUNC
> > > +
> > > +; FUNC-LABEL: @f32
> > > +; R600: FRACT
> > > +; R600-DAG: ADD
> > > +; R600-DAG: CEIL
> > > +; R600-DAG: FLOOR
> > > +; R600: CNDGE
> > > +define void @f32(float addrspace(1)* %out, float %in) {
> > > +entry:
> > > +  %0 = call float @llvm.round.f32(float %in)
> > > +  store float %0, float addrspace(1)* %out
> > > +  ret void
> > > +}
> > > +
> > > +; The vector tests are really difficult to verify, since it can be
> hard to
> > > +; predict how the scheduler will order the instructions.  We already
> have
> > > +; a test for the scalar case, so the vector tests just check that the
> > > +; compiler doesn't crash.
> > > +
> > > +; FUNC-LABEL: v2f32
> > > +; R600: CF_END
> > > +define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
> > > +entry:
> > > +  %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in)
> > > +  store <2 x float> %0, <2 x float> addrspace(1)* %out
> > > +  ret void
> > > +}
> > > +
> > > +; FUNC-LABEL: v4f32
> > > +; R600: CF_END
> > > +define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
> > > +entry:
> > > +  %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in)
> > > +  store <4 x float> %0, <4 x float> addrspace(1)* %out
> > > +  ret void
> > > +}
> > > +
> > > +declare float @llvm.round.f32(float)
> > > +declare <2 x float> @llvm.round.v2f32(<2 x float>)
> > > +declare <4 x float> @llvm.round.v4f32(<4 x float>)
> > > --
> > > 1.8.1.4
> > >
> > > _______________________________________________
> > > llvm-commits mailing list
> > > llvm-commits at cs.uiuc.edu
> > > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> >
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131125/2f1be503/attachment.html>