[Libclc-dev] [PATCH] math: Add fmod implementation
Aaron Watry
awatry at gmail.com
Sun Sep 21 17:24:34 PDT 2014
Works for me. Let me know when that gets committed and I can either do v3 or if you write it I'll take a look at the patch.
--Aaron
On Sep 19, 2014 5:57 PM, Jan Vesely <jan.vesely at rutgers.edu> wrote:
>
> On Tue, 2014-09-16 at 17:36 -0500, Aaron Watry wrote:
> > Ping.
>
> I've just sent clang patch that implements __builtin_fmod using frem.
> Can you wait with this, pending the outcome of that patch?
> the __builtin_fmod version of libclc patch is much smaller
>
> thanks,
> jan
>
> >
> > On Wed, Sep 10, 2014 at 6:37 PM, Aaron Watry <awatry at gmail.com> wrote:
> > > Passes piglit tests on evergreen (sent to piglit list).
> > >
> > > v2: Use llvm frem instruction for fmod
> > >
> > > Signed-off-by: Aaron Watry <awatry at gmail.com>
> > > ---
> > > generic/include/clc/clc.h | 1 +
> > > generic/include/clc/math/fmod.h | 7 +++++
> > > generic/lib/SOURCES | 2 ++
> > > generic/lib/math/fmod.cl | 26 ++++++++++++++++++
> > > generic/lib/math/fmod_impl.ll | 60 +++++++++++++++++++++++++++++++++++++++++
> > > 5 files changed, 96 insertions(+)
> > > create mode 100644 generic/include/clc/math/fmod.h
> > > create mode 100644 generic/lib/math/fmod.cl
> > > create mode 100644 generic/lib/math/fmod_impl.ll
> > >
> > > diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> > > index b8c1cb9..94557a1 100644
> > > --- a/generic/include/clc/clc.h
> > > +++ b/generic/include/clc/clc.h
> > > @@ -47,6 +47,7 @@
> > > #include <clc/math/fma.h>
> > > #include <clc/math/fmax.h>
> > > #include <clc/math/fmin.h>
> > > +#include <clc/math/fmod.h>
> > > #include <clc/math/hypot.h>
> > > #include <clc/math/log.h>
> > > #include <clc/math/log2.h>
> > > diff --git a/generic/include/clc/math/fmod.h b/generic/include/clc/math/fmod.h
> > > new file mode 100644
> > > index 0000000..737679f
> > > --- /dev/null
> > > +++ b/generic/include/clc/math/fmod.h
> > > @@ -0,0 +1,7 @@
> > > +#define __CLC_BODY <clc/math/binary_decl.inc>
> > > +#define __CLC_FUNCTION fmod
> > > +
> > > +#include <clc/math/gentype.inc>
> > > +
> > > +#undef __CLC_BODY
> > > +#undef __CLC_FUNCTION
> > > diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> > > index e4ba1d1..141f3f8 100644
> > > --- a/generic/lib/SOURCES
> > > +++ b/generic/lib/SOURCES
> > > @@ -39,6 +39,8 @@ math/exp.cl
> > > math/exp10.cl
> > > math/fmax.cl
> > > math/fmin.cl
> > > +math/fmod.cl
> > > +math/fmod_impl.ll
> > > math/hypot.cl
> > > math/mad.cl
> > > math/mix.cl
> > > diff --git a/generic/lib/math/fmod.cl b/generic/lib/math/fmod.cl
> > > new file mode 100644
> > > index 0000000..c15b3dc
> > > --- /dev/null
> > > +++ b/generic/lib/math/fmod.cl
> > > @@ -0,0 +1,26 @@
> > > +#include <clc/clc.h>
> > > +#include "../clcmacro.h"
> > > +
> > > +#define _FMOD_DEF(TYPE, WIDTH, FUNCTION, CLC_FUNCTION) \
> > > +_CLC_DECL TYPE##WIDTH CLC_FUNCTION##WIDTH(TYPE##WIDTH, TYPE##WIDTH); \
> > > +_CLC_OVERLOAD _CLC_DEF TYPE##WIDTH FUNCTION(TYPE##WIDTH x, TYPE##WIDTH y) { \
> > > + return CLC_FUNCTION##WIDTH(x, y); \
> > > +}
> > > +
> > > +#define _FMOD_DEF_VECTORIZE(TYPE, FUNCTION, CLC_FUNCTION)\
> > > +_FMOD_DEF(TYPE,,FUNCTION,CLC_FUNCTION) \
> > > +_FMOD_DEF(TYPE,2,FUNCTION,CLC_FUNCTION) \
> > > +_FMOD_DEF(TYPE,3,FUNCTION,CLC_FUNCTION) \
> > > +_FMOD_DEF(TYPE,4,FUNCTION,CLC_FUNCTION) \
> > > +_FMOD_DEF(TYPE,8,FUNCTION,CLC_FUNCTION) \
> > > +_FMOD_DEF(TYPE,16,FUNCTION,CLC_FUNCTION) \
> > > +
> > > +_FMOD_DEF_VECTORIZE(float, fmod, __clc_fmodf)
> > > +
> > > +#ifdef cl_khr_fp64
> > > +
> > > +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> > > +
> > > +_FMOD_DEF_VECTORIZE(double, fmod, __clc_fmod)
> > > +
> > > +#endif
> > > diff --git a/generic/lib/math/fmod_impl.ll b/generic/lib/math/fmod_impl.ll
> > > new file mode 100644
> > > index 0000000..66c4982
> > > --- /dev/null
> > > +++ b/generic/lib/math/fmod_impl.ll
> > > @@ -0,0 +1,60 @@
> > > +
> > > +define float @__clc_fmodf(float %x, float %y) nounwind readnone alwaysinline {
> > > + %result = frem float %x, %y
> > > + ret float %result
> > > +}
> > > +
> > > +define <2 x float> @__clc_fmodf2(<2 x float> %x, <2 x float> %y) nounwind readnone alwaysinline {
> > > + %result = frem <2 x float> %x, %y
> > > + ret <2 x float> %result
> > > +}
> > > +
> > > +define <3 x float> @__clc_fmodf3(<3 x float> %x, <3 x float> %y) nounwind readnone alwaysinline {
> > > + %result = frem <3 x float> %x, %y
> > > + ret <3 x float> %result
> > > +}
> > > +
> > > +define <4 x float> @__clc_fmodf4(<4 x float> %x, <4 x float> %y) nounwind readnone alwaysinline {
> > > + %result = frem <4 x float> %x, %y
> > > + ret <4 x float> %result
> > > +}
> > > +
> > > +define <8 x float> @__clc_fmodf8(<8 x float> %x, <8 x float> %y) nounwind readnone alwaysinline {
> > > + %result = frem <8 x float> %x, %y
> > > + ret <8 x float> %result
> > > +}
> > > +
> > > +define <16 x float> @__clc_fmodf16(<16 x float> %x, <16 x float> %y) nounwind readnone alwaysinline {
> > > + %result = frem <16 x float> %x, %y
> > > + ret <16 x float> %result
> > > +}
> > > +
> > > +define double @__clc_fmod(double %x, double %y) nounwind readnone alwaysinline {
> > > + %result = frem double %x, %y
> > > + ret double %result
> > > +}
> > > +
> > > +define <2 x double> @__clc_fmod2(<2 x double> %x, <2 x double> %y) nounwind readnone alwaysinline {
> > > + %result = frem <2 x double> %x, %y
> > > + ret <2 x double> %result
> > > +}
> > > +
> > > +define <3 x double> @__clc_fmod3(<3 x double> %x, <3 x double> %y) nounwind readnone alwaysinline {
> > > + %result = frem <3 x double> %x, %y
> > > + ret <3 x double> %result
> > > +}
> > > +
> > > +define <4 x double> @__clc_fmod4(<4 x double> %x, <4 x double> %y) nounwind readnone alwaysinline {
> > > + %result = frem <4 x double> %x, %y
> > > + ret <4 x double> %result
> > > +}
> > > +
> > > +define <8 x double> @__clc_fmod8(<8 x double> %x, <8 x double> %y) nounwind readnone alwaysinline {
> > > + %result = frem <8 x double> %x, %y
> > > + ret <8 x double> %result
> > > +}
> > > +
> > > +define <16 x double> @__clc_fmod16(<16 x double> %x, <16 x double> %y) nounwind readnone alwaysinline {
> > > + %result = frem <16 x double> %x, %y
> > > + ret <16 x double> %result
> > > +}
> > > --
> > > 1.9.1
> > >
> >
> > _______________________________________________
> > Libclc-dev mailing list
> > Libclc-dev at pcc.me.uk
> > http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
>
> --
> Jan Vesely <jan.vesely at rutgers.edu>
More information about the Libclc-dev
mailing list