[Libclc-dev] [PATCH 2/2] R600: improve float vload/vstore path

Tom Stellard tom at stellard.net
Wed Jul 23 08:27:30 PDT 2014


On Fri, Jul 18, 2014 at 11:04:20AM -0500, Aaron Watry wrote:
> float values can be loaded/stored via casting through int first
> which prevents us having to write float load/store assembly paths (which
> can be done if deemed desirable).
> 
> This cast is the same method we use for unsigned int types.  This lets
> us write one assembly path for all 32-bit value types (and eventually
> i8/i16/i64 paths too).
> 
> This results in a fabs(float16) unit test kernel going from 101 lines to
> 8 lines of llvm bit code and decompiled shader size of 296dw and 32gprs
> to 94dw and 8gprs on evergreen (CEDAR).
> 

LGTM.

> Signed-off-by: Aaron Watry <awatry at gmail.com>
> ---
>  r600/lib/shared/vload.cl  |  4 +++-
>  r600/lib/shared/vstore.cl | 11 +++++++++--
>  2 files changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/r600/lib/shared/vload.cl b/r600/lib/shared/vload.cl
> index 49309c3..79dc976 100644
> --- a/r600/lib/shared/vload.cl
> +++ b/r600/lib/shared/vload.cl
> @@ -35,7 +35,6 @@
>      VLOAD_ADDR_SPACES(ushort) \
>      VLOAD_ADDR_SPACES(long) \
>      VLOAD_ADDR_SPACES(ulong) \
> -    VLOAD_ADDR_SPACES(float) \
>  
>  VLOAD_TYPES()
>  
> @@ -50,6 +49,8 @@ VLOAD_VECTORIZE(int, __private)
>  VLOAD_VECTORIZE(int, __local)
>  VLOAD_VECTORIZE(uint, __private)
>  VLOAD_VECTORIZE(uint, __local)
> +VLOAD_VECTORIZE(float, __private)
> +VLOAD_VECTORIZE(float, __local)
>  
>  //We only define functions for typeN vloadN(), and then just bitcast the result for unsigned types
>  #define _CLC_VLOAD_ASM_DECL(PRIM_TYPE,LLVM_SCALAR_TYPE,ADDR_SPACE,ADDR_SPACE_ID) \
> @@ -80,5 +81,6 @@ _CLC_DECL PRIM_TYPE##16 __clc_vload16_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID
>    _CLC_VLOAD_ASM_DECL(int,i32,__constant,2) \
>    _CLC_VLOAD_ASM_OVERLOAD_ADDR_SPACES(int,int,i32) \
>    _CLC_VLOAD_ASM_OVERLOAD_ADDR_SPACES(uint,int,i32) \
> +  _CLC_VLOAD_ASM_OVERLOAD_ADDR_SPACES(float,int,i32) \
>  
>  _CLC_VLOAD_ASM_OVERLOADS()
> \ No newline at end of file
> diff --git a/r600/lib/shared/vstore.cl b/r600/lib/shared/vstore.cl
> index a150849..51890ed 100644
> --- a/r600/lib/shared/vstore.cl
> +++ b/r600/lib/shared/vstore.cl
> @@ -42,7 +42,6 @@
>      VSTORE_ADDR_SPACES(ushort) \
>      VSTORE_ADDR_SPACES(long) \
>      VSTORE_ADDR_SPACES(ulong) \
> -    VSTORE_ADDR_SPACES(float) \
>  
>  VSTORE_TYPES()
>  
> @@ -55,6 +54,8 @@ VSTORE_VECTORIZE(int, __private)
>  VSTORE_VECTORIZE(int, __local)
>  VSTORE_VECTORIZE(uint, __private)
>  VSTORE_VECTORIZE(uint, __local)
> +VSTORE_VECTORIZE(float, __private)
> +VSTORE_VECTORIZE(float, __local)
>  
>  _CLC_OVERLOAD _CLC_DEF void vstore3(int3 vec, size_t offset, global int *mem) {
>      mem[3*offset] = vec.s0;
> @@ -66,6 +67,11 @@ _CLC_OVERLOAD _CLC_DEF void vstore3(uint3 vec, size_t offset, global uint *mem)
>      mem[3*offset+1] = vec.s1;
>      mem[3*offset+2] = vec.s2;
>  }
> +_CLC_OVERLOAD _CLC_DEF void vstore3(float3 vec, size_t offset, global float *mem) {
> +    mem[3*offset] = vec.s0;
> +    mem[3*offset+1] = vec.s1;
> +    mem[3*offset+2] = vec.s2;
> +}
>  
>  /*Note: R600 doesn't support store <3 x ?>... so
>   * those functions aren't actually overridden here... lowest-common-denominator
> @@ -83,7 +89,7 @@ _CLC_DECL void __clc_vstore16_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (PRIM_TY
>      __clc_vstore##VEC_WIDTH##_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (__builtin_astype(vec, S_PRIM_TYPE##VEC_WIDTH), (ADDR_SPACE S_PRIM_TYPE *)&x[ VEC_WIDTH * offset]); \
>    } \
>  
> -/*Note: R600 back-end doesn't support load <3 x ?>... so
> +/*Note: R600 back-end doesn't support store <3 x ?>... so
>   * those functions aren't actually overridden here... When the back-end supports
>   * that, then clean add here, and remove the vstore3 definitions from above.
>   */
> @@ -100,5 +106,6 @@ _CLC_DECL void __clc_vstore16_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (PRIM_TY
>    _CLC_VSTORE_ASM_DECL(int,i32,__global,1) \
>    _CLC_VSTORE_ASM_OVERLOAD_ADDR_SPACES(int,int,i32) \
>    _CLC_VSTORE_ASM_OVERLOAD_ADDR_SPACES(uint,int,i32) \
> +  _CLC_VSTORE_ASM_OVERLOAD_ADDR_SPACES(float,int,i32) \
>  
>  _CLC_VSTORE_ASM_OVERLOADS()
> \ No newline at end of file
> -- 
> 1.9.1
> 
> 
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at pcc.me.uk
> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev




More information about the Libclc-dev mailing list