[Libclc-dev] [PATCH 5/5] amdgcn-amdhsa: Convert get_{global, local}_size to clc for all llvm versions

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Fri Nov 2 20:22:01 PDT 2018


First 4 in this series are Reviewed-By: Aaron Watry <awatry at gmail.com>

This patch (5/5) is Acked-By: Aaron Watry <awatry at gmail.com>

I don't see anything wrong with it, but I'm not too familiar with the
amdhsa code.

--Aaron
On Mon, Oct 29, 2018 at 1:43 AM Jan Vesely via Libclc-dev
<libclc-dev at lists.llvm.org> wrote:
>
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>  amdgcn-amdhsa/lib/OVERRIDES                   |  6 ---
>  amdgcn-amdhsa/lib/OVERRIDES_3.9               |  3 --
>  amdgcn-amdhsa/lib/OVERRIDES_4.0               |  2 -
>  amdgcn-amdhsa/lib/OVERRIDES_5.0               |  2 -
>  amdgcn-amdhsa/lib/SOURCES_3.9                 |  2 -
>  amdgcn-amdhsa/lib/SOURCES_4.0                 |  2 -
>  amdgcn-amdhsa/lib/SOURCES_5.0                 |  2 -
>  .../lib/workitem/get_global_size.39.ll        | 36 -----------------
>  .../lib/workitem/get_global_size.50.ll        | 39 -------------------
>  amdgcn-amdhsa/lib/workitem/get_global_size.cl | 10 ++++-
>  .../lib/workitem/get_local_size.39.ll         | 35 -----------------
>  .../lib/workitem/get_local_size.50.ll         | 38 ------------------
>  amdgcn-amdhsa/lib/workitem/get_local_size.cl  | 10 ++++-
>  13 files changed, 16 insertions(+), 171 deletions(-)
>  delete mode 100644 amdgcn-amdhsa/lib/OVERRIDES
>  delete mode 100644 amdgcn-amdhsa/lib/OVERRIDES_3.9
>  delete mode 100644 amdgcn-amdhsa/lib/OVERRIDES_4.0
>  delete mode 100644 amdgcn-amdhsa/lib/OVERRIDES_5.0
>  delete mode 100644 amdgcn-amdhsa/lib/SOURCES_3.9
>  delete mode 100644 amdgcn-amdhsa/lib/SOURCES_4.0
>  delete mode 100644 amdgcn-amdhsa/lib/SOURCES_5.0
>  delete mode 100644 amdgcn-amdhsa/lib/workitem/get_global_size.39.ll
>  delete mode 100644 amdgcn-amdhsa/lib/workitem/get_global_size.50.ll
>  delete mode 100644 amdgcn-amdhsa/lib/workitem/get_local_size.39.ll
>  delete mode 100644 amdgcn-amdhsa/lib/workitem/get_local_size.50.ll
>
> diff --git a/amdgcn-amdhsa/lib/OVERRIDES b/amdgcn-amdhsa/lib/OVERRIDES
> deleted file mode 100644
> index a7a694a..0000000
> --- a/amdgcn-amdhsa/lib/OVERRIDES
> +++ /dev/null
> @@ -1,6 +0,0 @@
> -workitem/get_num_groups.ll
> -workitem/get_global_size.ll
> -workitem/get_local_size.ll
> -workitem/get_num_groups.40.ll
> -workitem/get_global_size.40.ll
> -workitem/get_local_size.40.ll
> diff --git a/amdgcn-amdhsa/lib/OVERRIDES_3.9 b/amdgcn-amdhsa/lib/OVERRIDES_3.9
> deleted file mode 100644
> index dfe9c8e..0000000
> --- a/amdgcn-amdhsa/lib/OVERRIDES_3.9
> +++ /dev/null
> @@ -1,3 +0,0 @@
> -workitem/get_global_size.cl
> -workitem/get_local_size.cl
> -workitem/get_num_groups.39.ll
> diff --git a/amdgcn-amdhsa/lib/OVERRIDES_4.0 b/amdgcn-amdhsa/lib/OVERRIDES_4.0
> deleted file mode 100644
> index ee3a48c..0000000
> --- a/amdgcn-amdhsa/lib/OVERRIDES_4.0
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -workitem/get_global_size.cl
> -workitem/get_local_size.cl
> diff --git a/amdgcn-amdhsa/lib/OVERRIDES_5.0 b/amdgcn-amdhsa/lib/OVERRIDES_5.0
> deleted file mode 100644
> index ee3a48c..0000000
> --- a/amdgcn-amdhsa/lib/OVERRIDES_5.0
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -workitem/get_global_size.cl
> -workitem/get_local_size.cl
> diff --git a/amdgcn-amdhsa/lib/SOURCES_3.9 b/amdgcn-amdhsa/lib/SOURCES_3.9
> deleted file mode 100644
> index a6a08af..0000000
> --- a/amdgcn-amdhsa/lib/SOURCES_3.9
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -workitem/get_global_size.39.ll
> -workitem/get_local_size.39.ll
> diff --git a/amdgcn-amdhsa/lib/SOURCES_4.0 b/amdgcn-amdhsa/lib/SOURCES_4.0
> deleted file mode 100644
> index 2b957ed..0000000
> --- a/amdgcn-amdhsa/lib/SOURCES_4.0
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -workitem/get_global_size.50.ll
> -workitem/get_local_size.50.ll
> diff --git a/amdgcn-amdhsa/lib/SOURCES_5.0 b/amdgcn-amdhsa/lib/SOURCES_5.0
> deleted file mode 100644
> index 2b957ed..0000000
> --- a/amdgcn-amdhsa/lib/SOURCES_5.0
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -workitem/get_global_size.50.ll
> -workitem/get_local_size.50.ll
> diff --git a/amdgcn-amdhsa/lib/workitem/get_global_size.39.ll b/amdgcn-amdhsa/lib/workitem/get_global_size.39.ll
> deleted file mode 100644
> index b5e7db2..0000000
> --- a/amdgcn-amdhsa/lib/workitem/get_global_size.39.ll
> +++ /dev/null
> @@ -1,36 +0,0 @@
> -declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
> -
> -define i32 @get_global_size(i32 %dim) #1 {
> -  %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
> -  switch i32 %dim, label %default [
> -    i32 0, label %x
> -    i32 1, label %y
> -    i32 2, label %z
> -  ]
> -
> -x:
> -  %ptr_x = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i32 12
> -  %ptr_x32 = bitcast i8 addrspace(2)* %ptr_x to i32 addrspace(2)*
> -  %x32 = load i32, i32 addrspace(2)* %ptr_x32, align 4, !invariant.load !0
> -  ret i32 %x32
> -
> -y:
> -  %ptr_y = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i32 16
> -  %ptr_y32 = bitcast i8 addrspace(2)* %ptr_y to i32 addrspace(2)*
> -  %y32 = load i32, i32 addrspace(2)* %ptr_y32, align 4, !invariant.load !0
> -  ret i32 %y32
> -
> -z:
> -  %ptr_z = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i32 20
> -  %ptr_z32 = bitcast i8 addrspace(2)* %ptr_z to i32 addrspace(2)*
> -  %z32 = load i32, i32 addrspace(2)* %ptr_z32, align 4, !invariant.load !0
> -  ret i32 %z32
> -
> -default:
> -  ret i32 1
> -}
> -
> -attributes #0 = { nounwind readnone }
> -attributes #1 = { alwaysinline norecurse nounwind readonly }
> -
> -!0 = !{}
> diff --git a/amdgcn-amdhsa/lib/workitem/get_global_size.50.ll b/amdgcn-amdhsa/lib/workitem/get_global_size.50.ll
> deleted file mode 100644
> index af0f2ea..0000000
> --- a/amdgcn-amdhsa/lib/workitem/get_global_size.50.ll
> +++ /dev/null
> @@ -1,39 +0,0 @@
> -declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
> -
> -define i64 @get_global_size(i32 %dim) #1 {
> -  %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
> -  switch i32 %dim, label %default [
> -    i32 0, label %x
> -    i32 1, label %y
> -    i32 2, label %z
> -  ]
> -
> -x:
> -  %ptr_x = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i64 12
> -  %ptr_x32 = bitcast i8 addrspace(2)* %ptr_x to i32 addrspace(2)*
> -  %x32 = load i32, i32 addrspace(2)* %ptr_x32, align 4, !invariant.load !0
> -  %size_x = zext i32 %x32 to i64
> -  ret i64 %size_x
> -
> -y:
> -  %ptr_y = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i64 16
> -  %ptr_y32 = bitcast i8 addrspace(2)* %ptr_y to i32 addrspace(2)*
> -  %y32 = load i32, i32 addrspace(2)* %ptr_y32, align 4, !invariant.load !0
> -  %size_y = zext i32 %y32 to i64
> -  ret i64 %size_y
> -
> -z:
> -  %ptr_z = getelementptr inbounds i8, i8 addrspace(2)* %dispatch_ptr, i64 20
> -  %ptr_z32 = bitcast i8 addrspace(2)* %ptr_z to i32 addrspace(2)*
> -  %z32 = load i32, i32 addrspace(2)* %ptr_z32, align 4, !invariant.load !0
> -  %size_z = zext i32 %z32 to i64
> -  ret i64 %size_z
> -
> -default:
> -  ret i64 1
> -}
> -
> -attributes #0 = { nounwind readnone }
> -attributes #1 = { alwaysinline norecurse nounwind readonly }
> -
> -!0 = !{}
> diff --git a/amdgcn-amdhsa/lib/workitem/get_global_size.cl b/amdgcn-amdhsa/lib/workitem/get_global_size.cl
> index 392cd08..2f95f99 100644
> --- a/amdgcn-amdhsa/lib/workitem/get_global_size.cl
> +++ b/amdgcn-amdhsa/lib/workitem/get_global_size.cl
> @@ -8,10 +8,16 @@
>  #define CONST_AS __attribute__((address_space(2)))
>  #endif
>
> +#if __clang_major__ >= 6
> +#define __dispatch_ptr __builtin_amdgcn_dispatch_ptr
> +#else
> +#define __dispatch_ptr __clc_amdgcn_dispatch_ptr
> +CONST_AS uchar * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
> +#endif
> +
>  _CLC_DEF size_t get_global_size(uint dim)
>  {
> -       CONST_AS uint * ptr =
> -               (CONST_AS uint *) __builtin_amdgcn_dispatch_ptr();
> +       CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
>         if (dim < 3)
>                 return ptr[3 + dim];
>         return 1;
> diff --git a/amdgcn-amdhsa/lib/workitem/get_local_size.39.ll b/amdgcn-amdhsa/lib/workitem/get_local_size.39.ll
> deleted file mode 100644
> index ecb5e8f..0000000
> --- a/amdgcn-amdhsa/lib/workitem/get_local_size.39.ll
> +++ /dev/null
> @@ -1,35 +0,0 @@
> -declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
> -
> -define i32 @get_local_size(i32 %dim) #1 {
> -  %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
> -  %dispatch_ptr_i32 = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
> -  %xy_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i32 1
> -  %xy_size = load i32, i32 addrspace(2)* %xy_size_ptr, align 4, !invariant.load !0
> -  switch i32 %dim, label %default [
> -    i32 0, label %x_dim
> -    i32 1, label %y_dim
> -    i32 2, label %z_dim
> -  ]
> -
> -x_dim:
> -  %x_size = and i32 %xy_size, 65535
> -  ret i32 %x_size
> -
> -y_dim:
> -  %y_size = lshr i32 %xy_size, 16
> -  ret i32 %y_size
> -
> -z_dim:
> -  %z_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i32 2
> -  %z_size = load i32, i32 addrspace(2)* %z_size_ptr, align 4, !invariant.load !0, !range !1
> -  ret i32 %z_size
> -
> -default:
> -  ret i32 1
> -}
> -
> -attributes #0 = { nounwind readnone }
> -attributes #1 = { alwaysinline norecurse nounwind readonly }
> -
> -!0 = !{}
> -!1 = !{ i32 0, i32 257 }
> diff --git a/amdgcn-amdhsa/lib/workitem/get_local_size.50.ll b/amdgcn-amdhsa/lib/workitem/get_local_size.50.ll
> deleted file mode 100644
> index ff4b811..0000000
> --- a/amdgcn-amdhsa/lib/workitem/get_local_size.50.ll
> +++ /dev/null
> @@ -1,38 +0,0 @@
> -declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
> -
> -define i64 @get_local_size(i32 %dim) #1 {
> -  %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
> -  %dispatch_ptr_i32 = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
> -  %xy_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 1
> -  %xy_size = load i32, i32 addrspace(2)* %xy_size_ptr, align 4, !invariant.load !0
> -  switch i32 %dim, label %default [
> -    i32 0, label %x_dim
> -    i32 1, label %y_dim
> -    i32 2, label %z_dim
> -  ]
> -
> -x_dim:
> -  %x_size = and i32 %xy_size, 65535
> -  %x_size.ext = zext i32 %x_size to i64
> -  ret i64 %x_size.ext
> -
> -y_dim:
> -  %y_size = lshr i32 %xy_size, 16
> -  %y_size.ext = zext i32 %y_size to i64
> -  ret i64 %y_size.ext
> -
> -z_dim:
> -  %z_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 2
> -  %z_size = load i32, i32 addrspace(2)* %z_size_ptr, align 4, !invariant.load !0, !range !1
> -  %z_size.ext = zext i32 %z_size to i64
> -  ret i64 %z_size.ext
> -
> -default:
> -  ret i64 1
> -}
> -
> -attributes #0 = { nounwind readnone }
> -attributes #1 = { alwaysinline norecurse nounwind readonly }
> -
> -!0 = !{}
> -!1 = !{ i32 0, i32 257 }
> diff --git a/amdgcn-amdhsa/lib/workitem/get_local_size.cl b/amdgcn-amdhsa/lib/workitem/get_local_size.cl
> index 64d1cf4..9f208d8 100644
> --- a/amdgcn-amdhsa/lib/workitem/get_local_size.cl
> +++ b/amdgcn-amdhsa/lib/workitem/get_local_size.cl
> @@ -8,10 +8,16 @@
>  #define CONST_AS __attribute__((address_space(2)))
>  #endif
>
> +#if __clang_major__ >= 6
> +#define __dispatch_ptr __builtin_amdgcn_dispatch_ptr
> +#else
> +#define __dispatch_ptr __clc_amdgcn_dispatch_ptr
> +CONST_AS char * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
> +#endif
> +
>  _CLC_DEF size_t get_local_size(uint dim)
>  {
> -       CONST_AS uint * ptr =
> -               (CONST_AS uint *) __builtin_amdgcn_dispatch_ptr();
> +       CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
>         switch (dim) {
>         case 0:
>                 return ptr[1] & 0xffffu;
> --
> 2.18.1
>
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev


More information about the Libclc-dev mailing list