<div dir="ltr">Great! Thank you. :)</div><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Nov 25, 2013 at 10:41 AM, Tom Stellard <span dir="ltr"><<a href="mailto:tom@stellard.net" target="_blank">tom@stellard.net</a>></span> wrote:<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="im">On Sun, Nov 24, 2013 at 09:32:59PM -0800, Bill Wendling wrote:<br>

> Hi Tom,<br>

><br>

> All candidates should be in the trunk first, unless they fix a bug that’s solely in the branch. :)<br>

><br>

<br>

</div>Hi Bill,<br>

<br>

In Mesa, we mark stable candidates with something like:<br>

<div class="im"><br>

NOTE: This is a candidate for the 3.4 branch.<br>

<br>

</div>So that we can have a script auto cherry-pick fixes to the stable<br>

branch.  I add it to my LLVM commits, so I can use the same script to<br>

verify that all my bugfixes made it into the stable branch.  I will<br>

email you once I've committed it to trunk.<br>

<span class="HOEnZb"><font color="#888888"><br>

-Tom<br>

</font></span><div class="HOEnZb"><div class="h5"><br>

<br>

> -bw<br>

><br>

> On Nov 22, 2013, at 7:04 PM, Tom Stellard <<a href="mailto:tom@stellard.net">tom@stellard.net</a>> wrote:<br>

><br>

> > From: Tom Stellard <<a href="mailto:thomas.stellard@amd.com">thomas.stellard@amd.com</a>><br>

> ><br>

> > NOTE: This is a candidate for the 3.4 branch.<br>

> > ---<br>

> > lib/Target/R600/AMDGPUISelLowering.cpp |  1 +<br>

> > lib/Target/R600/AMDGPUInstrInfo.td     |  3 +++<br>

> > lib/Target/R600/R600Instructions.td    | 18 +++++++++++----<br>

> > test/CodeGen/R600/llvm.round.ll        | 41 ++++++++++++++++++++++++++++++++++<br>

> > 4 files changed, 59 insertions(+), 4 deletions(-)<br>

> > create mode 100644 test/CodeGen/R600/llvm.round.ll<br>

> ><br>

> > diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp<br>

> > index fdabea5..f2a6aab 100644<br>

> > --- a/lib/Target/R600/AMDGPUISelLowering.cpp<br>

> > +++ b/lib/Target/R600/AMDGPUISelLowering.cpp<br>

> > @@ -58,6 +58,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :<br>

> >   setOperationAction(ISD::FABS,   MVT::f32, Legal);<br>

> >   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);<br>

> >   setOperationAction(ISD::FRINT,  MVT::f32, Legal);<br>

> > +  setOperationAction(ISD::FROUND, MVT::f32, Legal);<br>

> ><br>

> >   // The hardware supports ROTR, but not ROTL<br>

> >   setOperationAction(ISD::ROTL, MVT::i32, Expand);<br>

> > diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td<br>

> > index c0d757e..fccede0 100644<br>

> > --- a/lib/Target/R600/AMDGPUInstrInfo.td<br>

> > +++ b/lib/Target/R600/AMDGPUInstrInfo.td<br>

> > @@ -83,3 +83,6 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",<br>

> > def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",<br>

> >                         SDTypeProfile<0, 2, []>,<br>

> >                         [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;<br>

> > +<br>

> > +def AMDGPUround : SDNode<"ISD::FROUND",<br>

> > +                         SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;<br>

> > diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td<br>

> > index abfde50..0346e24 100644<br>

> > --- a/lib/Target/R600/R600Instructions.td<br>

> > +++ b/lib/Target/R600/R600Instructions.td<br>

> > @@ -1110,6 +1110,10 @@ class COS_Common <bits<11> inst> : R600_1OP <<br>

> >   let Itinerary = TransALU;<br>

> > }<br>

> ><br>

> > +def CLAMP_R600 :  CLAMP <R600_Reg32>;<br>

> > +def FABS_R600 : FABS<R600_Reg32>;<br>

> > +def FNEG_R600 : FNEG<R600_Reg32>;<br>

> > +<br>

> > //===----------------------------------------------------------------------===//<br>

> > // Helper patterns for complex intrinsics<br>

> > //===----------------------------------------------------------------------===//<br>

> > @@ -1132,6 +1136,13 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie<br>

> >   (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))<br>

> >> ;<br>

> ><br>

> > +// FROUND pattern<br>

> > +class FROUNDPat<Instruction CNDGE> : Pat <<br>

> > +  (AMDGPUround f32:$x),<br>

> > +  (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))<br>

> > +>;<br>

> > +<br>

> > +<br>

> > //===----------------------------------------------------------------------===//<br>

> > // R600 / R700 Instructions<br>

> > //===----------------------------------------------------------------------===//<br>

> > @@ -1173,6 +1184,7 @@ let Predicates = [isR600] in {<br>

> >   def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;<br>

> ><br>

> >   def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;<br>

> > +  def : FROUNDPat <CNDGE_r600>;<br>

> ><br>

> >   def R600_ExportSwz : ExportSwzInst {<br>

> >     let Word1{20-17} = 0; // BURST_COUNT<br>

> > @@ -1726,6 +1738,8 @@ def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",<br>

> >   // SHA-256 Patterns<br>

> >   def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;<br>

> ><br>

> > +  def : FROUNDPat <CNDGE_eg>;<br>

> > +<br>

> >   def EG_ExportSwz : ExportSwzInst {<br>

> >     let Word1{19-16} = 0; // BURST_COUNT<br>

> >     let Word1{20} = 0; // VALID_PIXEL_MODE<br>

> > @@ -2090,10 +2104,6 @@ def TXD_SHADOW: InstR600 <<br>

> > } // End isPseudo = 1<br>

> > } // End usesCustomInserter = 1<br>

> ><br>

> > -def CLAMP_R600 :  CLAMP <R600_Reg32>;<br>

> > -def FABS_R600 : FABS<R600_Reg32>;<br>

> > -def FNEG_R600 : FNEG<R600_Reg32>;<br>

> > -<br>

> > //===---------------------------------------------------------------------===//<br>

> > // Return instruction<br>

> > //===---------------------------------------------------------------------===//<br>

> > diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll<br>

> > new file mode 100644<br>

> > index 0000000..e06d45d<br>

> > --- /dev/null<br>

> > +++ b/test/CodeGen/R600/llvm.round.ll<br>

> > @@ -0,0 +1,41 @@<br>

> > +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC<br>

> > +<br>

> > +; FUNC-LABEL: @f32<br>

> > +; R600: FRACT<br>

> > +; R600-DAG: ADD<br>

> > +; R600-DAG: CEIL<br>

> > +; R600-DAG: FLOOR<br>

> > +; R600: CNDGE<br>

> > +define void @f32(float addrspace(1)* %out, float %in) {<br>

> > +entry:<br>

> > +  %0 = call float @llvm.round.f32(float %in)<br>

> > +  store float %0, float addrspace(1)* %out<br>

> > +  ret void<br>

> > +}<br>

> > +<br>

> > +; The vector tests are really difficult to verify, since it can be hard to<br>

> > +; predict how the scheduler will order the instructions.  We already have<br>

> > +; a test for the scalar case, so the vector tests just check that the<br>

> > +; compiler doesn't crash.<br>

> > +<br>

> > +; FUNC-LABEL: v2f32<br>

> > +; R600: CF_END<br>

> > +define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {<br>

> > +entry:<br>

> > +  %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in)<br>

> > +  store <2 x float> %0, <2 x float> addrspace(1)* %out<br>

> > +  ret void<br>

> > +}<br>

> > +<br>

> > +; FUNC-LABEL: v4f32<br>

> > +; R600: CF_END<br>

> > +define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {<br>

> > +entry:<br>

> > +  %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in)<br>

> > +  store <4 x float> %0, <4 x float> addrspace(1)* %out<br>

> > +  ret void<br>

> > +}<br>

> > +<br>

> > +declare float @llvm.round.f32(float)<br>

> > +declare <2 x float> @llvm.round.v2f32(<2 x float>)<br>

> > +declare <4 x float> @llvm.round.v4f32(<4 x float>)<br>

> > --<br>

> > 1.8.1.4<br>

> ><br>

> > _______________________________________________<br>

> > llvm-commits mailing list<br>

> > <a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>

> > <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>

><br>

</div></div></blockquote></div><br></div>