[Libclc-dev] [PATCH v2 1/2] AMDGPU: Use clang intrinsics for workitem builtins
Tom Stellard via Libclc-dev
libclc-dev at lists.llvm.org
Wed Jun 1 19:00:25 PDT 2016
LGTM.
On Wed, Jun 01, 2016 at 12:12:34PM -0400, Jan Vesely via Libclc-dev wrote:
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>
> Depends on http://reviews.llvm.org/D20299
>
> amdgcn/lib/SOURCES | 8 ++++++--
> amdgcn/lib/workitem/get_global_size.ll | 18 ++++++++++++++++++
> amdgcn/lib/workitem/get_group_id.cl | 11 +++++++++++
> amdgcn/lib/workitem/get_group_id.ll | 29 -----------------------------
> amdgcn/lib/workitem/get_local_id.cl | 11 +++++++++++
> amdgcn/lib/workitem/get_local_id.ll | 31 -------------------------------
> amdgcn/lib/workitem/get_local_size.ll | 18 ++++++++++++++++++
> amdgcn/lib/workitem/get_num_groups.ll | 18 ++++++++++++++++++
> amdgcn/lib/workitem/get_work_dim.cl | 7 +++++++
> amdgpu/lib/SOURCES | 4 ----
> amdgpu/lib/workitem/get_global_size.ll | 18 ------------------
> amdgpu/lib/workitem/get_local_size.ll | 18 ------------------
> amdgpu/lib/workitem/get_num_groups.ll | 18 ------------------
> amdgpu/lib/workitem/get_work_dim.ll | 8 --------
> r600/lib/SOURCES | 8 ++++++--
> r600/lib/workitem/get_global_size.cl | 11 +++++++++++
> r600/lib/workitem/get_group_id.cl | 11 +++++++++++
> r600/lib/workitem/get_group_id.ll | 29 -----------------------------
> r600/lib/workitem/get_local_id.cl | 11 +++++++++++
> r600/lib/workitem/get_local_id.ll | 31 -------------------------------
> r600/lib/workitem/get_local_size.cl | 11 +++++++++++
> r600/lib/workitem/get_num_groups.cl | 11 +++++++++++
> r600/lib/workitem/get_work_dim.cl | 6 ++++++
> 23 files changed, 156 insertions(+), 190 deletions(-)
> create mode 100644 amdgcn/lib/workitem/get_global_size.ll
> create mode 100644 amdgcn/lib/workitem/get_group_id.cl
> delete mode 100644 amdgcn/lib/workitem/get_group_id.ll
> create mode 100644 amdgcn/lib/workitem/get_local_id.cl
> delete mode 100644 amdgcn/lib/workitem/get_local_id.ll
> create mode 100644 amdgcn/lib/workitem/get_local_size.ll
> create mode 100644 amdgcn/lib/workitem/get_num_groups.ll
> create mode 100644 amdgcn/lib/workitem/get_work_dim.cl
> delete mode 100644 amdgpu/lib/workitem/get_global_size.ll
> delete mode 100644 amdgpu/lib/workitem/get_local_size.ll
> delete mode 100644 amdgpu/lib/workitem/get_num_groups.ll
> delete mode 100644 amdgpu/lib/workitem/get_work_dim.ll
> create mode 100644 r600/lib/workitem/get_global_size.cl
> create mode 100644 r600/lib/workitem/get_group_id.cl
> delete mode 100644 r600/lib/workitem/get_group_id.ll
> create mode 100644 r600/lib/workitem/get_local_id.cl
> delete mode 100644 r600/lib/workitem/get_local_id.ll
> create mode 100644 r600/lib/workitem/get_local_size.cl
> create mode 100644 r600/lib/workitem/get_num_groups.cl
> create mode 100644 r600/lib/workitem/get_work_dim.cl
>
> diff --git a/amdgcn/lib/SOURCES b/amdgcn/lib/SOURCES
> index 49c8dd5..b7e1d98 100644
> --- a/amdgcn/lib/SOURCES
> +++ b/amdgcn/lib/SOURCES
> @@ -1,3 +1,7 @@
> synchronization/barrier_impl.ll
> -workitem/get_group_id.ll
> -workitem/get_local_id.ll
> +workitem/get_group_id.cl
> +workitem/get_local_id.cl
> +workitem/get_work_dim.cl
> +workitem/get_num_groups.ll
> +workitem/get_local_size.ll
> +workitem/get_global_size.ll
> diff --git a/amdgcn/lib/workitem/get_global_size.ll b/amdgcn/lib/workitem/get_global_size.ll
> new file mode 100644
> index 0000000..ac2d08d
> --- /dev/null
> +++ b/amdgcn/lib/workitem/get_global_size.ll
> @@ -0,0 +1,18 @@
> +declare i32 @llvm.r600.read.global.size.x() nounwind readnone
> +declare i32 @llvm.r600.read.global.size.y() nounwind readnone
> +declare i32 @llvm.r600.read.global.size.z() nounwind readnone
> +
> +define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
> + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> +x_dim:
> + %x = call i32 @llvm.r600.read.global.size.x() nounwind readnone
> + ret i32 %x
> +y_dim:
> + %y = call i32 @llvm.r600.read.global.size.y() nounwind readnone
> + ret i32 %y
> +z_dim:
> + %z = call i32 @llvm.r600.read.global.size.z() nounwind readnone
> + ret i32 %z
> +default:
> + ret i32 0
> +}
> diff --git a/amdgcn/lib/workitem/get_group_id.cl b/amdgcn/lib/workitem/get_group_id.cl
> new file mode 100644
> index 0000000..4b4e7a7
> --- /dev/null
> +++ b/amdgcn/lib/workitem/get_group_id.cl
> @@ -0,0 +1,11 @@
> +#include <clc/clc.h>
> +
> +_CLC_DEF uint get_group_id(uint dim)
> +{
> + switch(dim) {
> + case 0: return __builtin_amdgcn_workgroup_id_x();
> + case 1: return __builtin_amdgcn_workgroup_id_y();
> + case 2: return __builtin_amdgcn_workgroup_id_z();
> + default: return 1;
> + }
> +}
> diff --git a/amdgcn/lib/workitem/get_group_id.ll b/amdgcn/lib/workitem/get_group_id.ll
> deleted file mode 100644
> index 9d820e0..0000000
> --- a/amdgcn/lib/workitem/get_group_id.ll
> +++ /dev/null
> @@ -1,29 +0,0 @@
> -declare i32 @llvm.amdgcn.workgroup.id.x() #0
> -declare i32 @llvm.amdgcn.workgroup.id.y() #0
> -declare i32 @llvm.amdgcn.workgroup.id.z() #0
> -
> -define i32 @get_group_id(i32 %dim) #1 {
> - switch i32 %dim, label %default [
> - i32 0, label %x_dim
> - i32 1, label %y_dim
> - i32 2, label %z_dim
> - ]
> -
> -x_dim:
> - %x = tail call i32 @llvm.amdgcn.workgroup.id.x()
> - ret i32 %x
> -
> -y_dim:
> - %y = tail call i32 @llvm.amdgcn.workgroup.id.y()
> - ret i32 %y
> -
> -z_dim:
> - %z = tail call i32 @llvm.amdgcn.workgroup.id.z()
> - ret i32 %z
> -
> -default:
> - ret i32 0
> -}
> -
> -attributes #0 = { nounwind readnone }
> -attributes #1 = { alwaysinline norecurse nounwind readnone }
> diff --git a/amdgcn/lib/workitem/get_local_id.cl b/amdgcn/lib/workitem/get_local_id.cl
> new file mode 100644
> index 0000000..257c30f
> --- /dev/null
> +++ b/amdgcn/lib/workitem/get_local_id.cl
> @@ -0,0 +1,11 @@
> +#include <clc/clc.h>
> +
> +_CLC_DEF uint get_local_id(uint dim)
> +{
> + switch(dim) {
> + case 0: return __builtin_amdgcn_workitem_id_x();
> + case 1: return __builtin_amdgcn_workitem_id_y();
> + case 2: return __builtin_amdgcn_workitem_id_z();
> + default: return 1;
> + }
> +}
> diff --git a/amdgcn/lib/workitem/get_local_id.ll b/amdgcn/lib/workitem/get_local_id.ll
> deleted file mode 100644
> index c54291c..0000000
> --- a/amdgcn/lib/workitem/get_local_id.ll
> +++ /dev/null
> @@ -1,31 +0,0 @@
> -declare i32 @llvm.amdgcn.workitem.id.x() #0
> -declare i32 @llvm.amdgcn.workitem.id.y() #0
> -declare i32 @llvm.amdgcn.workitem.id.z() #0
> -
> -define i32 @get_local_id(i32 %dim) #1 {
> - switch i32 %dim, label %default [
> - i32 0, label %x_dim
> - i32 1, label %y_dim
> - i32 2, label %z_dim
> - ]
> -
> -x_dim:
> - %x = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
> - ret i32 %x
> -
> -y_dim:
> - %y = tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0
> - ret i32 %y
> -
> -z_dim:
> - %z = tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0
> - ret i32 %z
> -
> -default:
> - ret i32 0
> -}
> -
> -attributes #0 = { nounwind readnone }
> -attributes #1 = { alwaysinline norecurse nounwind readnone }
> -
> -!0 = !{ i32 0, i32 2048 }
> diff --git a/amdgcn/lib/workitem/get_local_size.ll b/amdgcn/lib/workitem/get_local_size.ll
> new file mode 100644
> index 0000000..0a98de6
> --- /dev/null
> +++ b/amdgcn/lib/workitem/get_local_size.ll
> @@ -0,0 +1,18 @@
> +declare i32 @llvm.r600.read.local.size.x() nounwind readnone
> +declare i32 @llvm.r600.read.local.size.y() nounwind readnone
> +declare i32 @llvm.r600.read.local.size.z() nounwind readnone
> +
> +define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
> + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> +x_dim:
> + %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone
> + ret i32 %x
> +y_dim:
> + %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone
> + ret i32 %y
> +z_dim:
> + %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone
> + ret i32 %z
> +default:
> + ret i32 0
> +}
> diff --git a/amdgcn/lib/workitem/get_num_groups.ll b/amdgcn/lib/workitem/get_num_groups.ll
> new file mode 100644
> index 0000000..a708f42
> --- /dev/null
> +++ b/amdgcn/lib/workitem/get_num_groups.ll
> @@ -0,0 +1,18 @@
> +declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
> +declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
> +declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
> +
> +define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
> + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> +x_dim:
> + %x = call i32 @llvm.r600.read.ngroups.x() nounwind readnone
> + ret i32 %x
> +y_dim:
> + %y = call i32 @llvm.r600.read.ngroups.y() nounwind readnone
> + ret i32 %y
> +z_dim:
> + %z = call i32 @llvm.r600.read.ngroups.z() nounwind readnone
> + ret i32 %z
> +default:
> + ret i32 0
> +}
> diff --git a/amdgcn/lib/workitem/get_work_dim.cl b/amdgcn/lib/workitem/get_work_dim.cl
> new file mode 100644
> index 0000000..ed352b2
> --- /dev/null
> +++ b/amdgcn/lib/workitem/get_work_dim.cl
> @@ -0,0 +1,7 @@
> +#include <clc/clc.h>
> +
> +_CLC_DEF uint get_work_dim()
> +{
> + __global uint * ptr = __builtin_amdgcn_implicitarg_ptr();
> + return *ptr;
> +}
> diff --git a/amdgpu/lib/SOURCES b/amdgpu/lib/SOURCES
> index 0f99fe1..7a2d384 100644
> --- a/amdgpu/lib/SOURCES
> +++ b/amdgpu/lib/SOURCES
> @@ -2,10 +2,6 @@ atomic/atomic.cl
> math/ldexp.cl
> math/nextafter.cl
> math/sqrt.cl
> -workitem/get_num_groups.ll
> -workitem/get_local_size.ll
> -workitem/get_global_size.ll
> -workitem/get_work_dim.ll
> synchronization/barrier.cl
> image/get_image_width.cl
> image/get_image_height.cl
> diff --git a/amdgpu/lib/workitem/get_global_size.ll b/amdgpu/lib/workitem/get_global_size.ll
> deleted file mode 100644
> index ac2d08d..0000000
> --- a/amdgpu/lib/workitem/get_global_size.ll
> +++ /dev/null
> @@ -1,18 +0,0 @@
> -declare i32 @llvm.r600.read.global.size.x() nounwind readnone
> -declare i32 @llvm.r600.read.global.size.y() nounwind readnone
> -declare i32 @llvm.r600.read.global.size.z() nounwind readnone
> -
> -define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
> - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> -x_dim:
> - %x = call i32 @llvm.r600.read.global.size.x() nounwind readnone
> - ret i32 %x
> -y_dim:
> - %y = call i32 @llvm.r600.read.global.size.y() nounwind readnone
> - ret i32 %y
> -z_dim:
> - %z = call i32 @llvm.r600.read.global.size.z() nounwind readnone
> - ret i32 %z
> -default:
> - ret i32 0
> -}
> diff --git a/amdgpu/lib/workitem/get_local_size.ll b/amdgpu/lib/workitem/get_local_size.ll
> deleted file mode 100644
> index 0a98de6..0000000
> --- a/amdgpu/lib/workitem/get_local_size.ll
> +++ /dev/null
> @@ -1,18 +0,0 @@
> -declare i32 @llvm.r600.read.local.size.x() nounwind readnone
> -declare i32 @llvm.r600.read.local.size.y() nounwind readnone
> -declare i32 @llvm.r600.read.local.size.z() nounwind readnone
> -
> -define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
> - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> -x_dim:
> - %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone
> - ret i32 %x
> -y_dim:
> - %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone
> - ret i32 %y
> -z_dim:
> - %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone
> - ret i32 %z
> -default:
> - ret i32 0
> -}
> diff --git a/amdgpu/lib/workitem/get_num_groups.ll b/amdgpu/lib/workitem/get_num_groups.ll
> deleted file mode 100644
> index a708f42..0000000
> --- a/amdgpu/lib/workitem/get_num_groups.ll
> +++ /dev/null
> @@ -1,18 +0,0 @@
> -declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
> -declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
> -declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
> -
> -define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
> - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> -x_dim:
> - %x = call i32 @llvm.r600.read.ngroups.x() nounwind readnone
> - ret i32 %x
> -y_dim:
> - %y = call i32 @llvm.r600.read.ngroups.y() nounwind readnone
> - ret i32 %y
> -z_dim:
> - %z = call i32 @llvm.r600.read.ngroups.z() nounwind readnone
> - ret i32 %z
> -default:
> - ret i32 0
> -}
> diff --git a/amdgpu/lib/workitem/get_work_dim.ll b/amdgpu/lib/workitem/get_work_dim.ll
> deleted file mode 100644
> index 1f86b5e..0000000
> --- a/amdgpu/lib/workitem/get_work_dim.ll
> +++ /dev/null
> @@ -1,8 +0,0 @@
> -declare i32 @llvm.AMDGPU.read.workdim() nounwind readnone
> -
> -define i32 @get_work_dim() nounwind readnone alwaysinline {
> - %x = call i32 @llvm.AMDGPU.read.workdim() nounwind readnone , !range !0
> - ret i32 %x
> -}
> -
> -!0 = !{ i32 1, i32 4 }
> diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES
> index 49c8dd5..1859b98 100644
> --- a/r600/lib/SOURCES
> +++ b/r600/lib/SOURCES
> @@ -1,3 +1,7 @@
> synchronization/barrier_impl.ll
> -workitem/get_group_id.ll
> -workitem/get_local_id.ll
> +workitem/get_global_size.cl
> +workitem/get_group_id.cl
> +workitem/get_local_id.cl
> +workitem/get_local_size.cl
> +workitem/get_num_groups.cl
> +workitem/get_work_dim.cl
> diff --git a/r600/lib/workitem/get_global_size.cl b/r600/lib/workitem/get_global_size.cl
> new file mode 100644
> index 0000000..8407737
> --- /dev/null
> +++ b/r600/lib/workitem/get_global_size.cl
> @@ -0,0 +1,11 @@
> +#include <clc/clc.h>
> +
> +_CLC_DEF uint get_global_size(uint dim)
> +{
> + switch(dim) {
> + case 0: return __builtin_r600_read_global_size_x();
> + case 1: return __builtin_r600_read_global_size_y();
> + case 2: return __builtin_r600_read_global_size_z();
> + default: return 1;
> + }
> +}
> diff --git a/r600/lib/workitem/get_group_id.cl b/r600/lib/workitem/get_group_id.cl
> new file mode 100644
> index 0000000..e5efc0a
> --- /dev/null
> +++ b/r600/lib/workitem/get_group_id.cl
> @@ -0,0 +1,11 @@
> +#include <clc/clc.h>
> +
> +_CLC_DEF uint get_group_id(uint dim)
> +{
> + switch(dim) {
> + case 0: return __builtin_r600_read_tgid_x();
> + case 1: return __builtin_r600_read_tgid_y();
> + case 2: return __builtin_r600_read_tgid_z();
> + default: return 1;
> + }
> +}
> diff --git a/r600/lib/workitem/get_group_id.ll b/r600/lib/workitem/get_group_id.ll
> deleted file mode 100644
> index 837c799..0000000
> --- a/r600/lib/workitem/get_group_id.ll
> +++ /dev/null
> @@ -1,29 +0,0 @@
> -declare i32 @llvm.r600.read.tgid.x() #0
> -declare i32 @llvm.r600.read.tgid.y() #0
> -declare i32 @llvm.r600.read.tgid.z() #0
> -
> -define i32 @get_group_id(i32 %dim) #1 {
> - switch i32 %dim, label %default [
> - i32 0, label %x_dim
> - i32 1, label %y_dim
> - i32 2, label %z_dim
> - ]
> -
> -x_dim:
> - %x = tail call i32 @llvm.r600.read.tgid.x()
> - ret i32 %x
> -
> -y_dim:
> - %y = tail call i32 @llvm.r600.read.tgid.y()
> - ret i32 %y
> -
> -z_dim:
> - %z = tail call i32 @llvm.r600.read.tgid.z()
> - ret i32 %z
> -
> -default:
> - ret i32 0
> -}
> -
> -attributes #0 = { nounwind readnone }
> -attributes #1 = { alwaysinline norecurse nounwind readnone }
> diff --git a/r600/lib/workitem/get_local_id.cl b/r600/lib/workitem/get_local_id.cl
> new file mode 100644
> index 0000000..a871a5d
> --- /dev/null
> +++ b/r600/lib/workitem/get_local_id.cl
> @@ -0,0 +1,11 @@
> +#include <clc/clc.h>
> +
> +_CLC_DEF uint get_local_id(uint dim)
> +{
> + switch(dim) {
> + case 0: return __builtin_r600_read_tidig_x();
> + case 1: return __builtin_r600_read_tidig_y();
> + case 2: return __builtin_r600_read_tidig_z();
> + default: return 1;
> + }
> +}
> diff --git a/r600/lib/workitem/get_local_id.ll b/r600/lib/workitem/get_local_id.ll
> deleted file mode 100644
> index da37ca0..0000000
> --- a/r600/lib/workitem/get_local_id.ll
> +++ /dev/null
> @@ -1,31 +0,0 @@
> -declare i32 @llvm.r600.read.tidig.x() #0
> -declare i32 @llvm.r600.read.tidig.y() #0
> -declare i32 @llvm.r600.read.tidig.z() #0
> -
> -define i32 @get_local_id(i32 %dim) #1 {
> - switch i32 %dim, label %default [
> - i32 0, label %x_dim
> - i32 1, label %y_dim
> - i32 2, label %z_dim
> - ]
> -
> -x_dim:
> - %x = tail call i32 @llvm.r600.read.tidig.x(), !range !0
> - ret i32 %x
> -
> -y_dim:
> - %y = tail call i32 @llvm.r600.read.tidig.y(), !range !0
> - ret i32 %y
> -z_dim:
> -
> - %z = tail call i32 @llvm.r600.read.tidig.z(), !range !0
> - ret i32 %z
> -
> -default:
> - ret i32 0
> -}
> -
> -attributes #0 = { nounwind readnone }
> -attributes #1 = { alwaysinline norecurse nounwind readnone }
> -
> -!0 = !{ i32 0, i32 2048 }
> diff --git a/r600/lib/workitem/get_local_size.cl b/r600/lib/workitem/get_local_size.cl
> new file mode 100644
> index 0000000..4c27ef5
> --- /dev/null
> +++ b/r600/lib/workitem/get_local_size.cl
> @@ -0,0 +1,11 @@
> +#include <clc/clc.h>
> +
> +_CLC_DEF uint get_local_size(uint dim)
> +{
> + switch(dim) {
> + case 0: return __builtin_r600_read_local_size_x();
> + case 1: return __builtin_r600_read_local_size_y();
> + case 2: return __builtin_r600_read_local_size_z();
> + default: return 1;
> + }
> +}
> diff --git a/r600/lib/workitem/get_num_groups.cl b/r600/lib/workitem/get_num_groups.cl
> new file mode 100644
> index 0000000..dd1db75
> --- /dev/null
> +++ b/r600/lib/workitem/get_num_groups.cl
> @@ -0,0 +1,11 @@
> +#include <clc/clc.h>
> +
> +_CLC_DEF uint get_num_groups(uint dim)
> +{
> + switch(dim) {
> + case 0: return __builtin_r600_read_ngroups_x();
> + case 1: return __builtin_r600_read_ngroups_y();
> + case 2: return __builtin_r600_read_ngroups_z();
> + default: return 1;
> + }
> +}
> diff --git a/r600/lib/workitem/get_work_dim.cl b/r600/lib/workitem/get_work_dim.cl
> new file mode 100644
> index 0000000..65493e7
> --- /dev/null
> +++ b/r600/lib/workitem/get_work_dim.cl
> @@ -0,0 +1,6 @@
> +#include <clc/clc.h>
> +
> +_CLC_DEF uint get_work_dim()
> +{
> + return __builtin_r600_read_workdim();
> +}
> --
> 2.5.5
>
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
More information about the Libclc-dev
mailing list