[Libclc-dev] [PATCH 1/4] amdgcn: Fix datalayout after clang r324101

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Mon Feb 19 13:22:33 PST 2018


On Tue, 2018-02-13 at 20:30 -0500, Jan Vesely wrote:
> r324101 switched around AS numbering
> 
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>  amdgcn-amdhsa/lib/OVERRIDES                        |  3 ++
>  amdgcn/lib/OVERRIDES_3.9                           |  1 +
>  amdgcn/lib/OVERRIDES_4.0                           |  4 ++
>  amdgcn/lib/OVERRIDES_5.0                           |  4 ++
>  amdgcn/lib/OVERRIDES_6.0                           |  4 ++
>  amdgcn/lib/SOURCES                                 |  1 -
>  amdgcn/lib/SOURCES_3.9                             |  2 +
>  amdgcn/lib/SOURCES_4.0                             |  5 +++
>  amdgcn/lib/SOURCES_5.0                             |  4 ++
>  amdgcn/lib/SOURCES_6.0                             |  4 ++
>  .../minmax_helpers.39.ll                           | 49 ++++++++++++++++++++++
>  .../minmax_helpers.ll                              |  2 +-
>  amdgcn/lib/workitem/get_global_size.40.ll          | 23 ++++++++++
>  amdgcn/lib/workitem/get_global_size.ll             |  2 +-
>  amdgcn/lib/workitem/get_local_size.40.ll           | 23 ++++++++++
>  amdgcn/lib/workitem/get_local_size.ll              |  2 +-
>  amdgcn/lib/workitem/get_num_groups.40.ll           | 23 ++++++++++
>  amdgcn/lib/workitem/get_num_groups.ll              |  2 +-
>  18 files changed, 153 insertions(+), 5 deletions(-)
>  create mode 100644 amdgcn/lib/OVERRIDES_4.0
>  create mode 100644 amdgcn/lib/OVERRIDES_5.0
>  create mode 100644 amdgcn/lib/OVERRIDES_6.0
>  create mode 100644 amdgcn/lib/SOURCES_4.0
>  create mode 100644 amdgcn/lib/SOURCES_5.0
>  create mode 100644 amdgcn/lib/SOURCES_6.0
>  create mode 100644 amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.39.ll
>  create mode 100644 amdgcn/lib/workitem/get_global_size.40.ll
>  create mode 100644 amdgcn/lib/workitem/get_local_size.40.ll
>  create mode 100644 amdgcn/lib/workitem/get_num_groups.40.ll
> 
> diff --git a/amdgcn-amdhsa/lib/OVERRIDES b/amdgcn-amdhsa/lib/OVERRIDES
> index c9bd69b..18ce3f8 100644
> --- a/amdgcn-amdhsa/lib/OVERRIDES
> +++ b/amdgcn-amdhsa/lib/OVERRIDES
> @@ -1 +1,4 @@
>  workitem/get_num_groups.ll
> +workitem/get_num_groups.40.ll
> +workitem/get_global_size.40.ll
> +workitem/get_local_size.40.ll
> diff --git a/amdgcn/lib/OVERRIDES_3.9 b/amdgcn/lib/OVERRIDES_3.9
> index f26fbbe..3268f67 100644
> --- a/amdgcn/lib/OVERRIDES_3.9
> +++ b/amdgcn/lib/OVERRIDES_3.9
> @@ -1,3 +1,4 @@
> +cl_khr_int64_extended_atomics/minmax_helpers.ll
>  workitem/get_global_size.ll
>  workitem/get_local_size.ll
>  workitem/get_num_groups.ll
> diff --git a/amdgcn/lib/OVERRIDES_4.0 b/amdgcn/lib/OVERRIDES_4.0
> new file mode 100644
> index 0000000..3268f67
> --- /dev/null
> +++ b/amdgcn/lib/OVERRIDES_4.0
> @@ -0,0 +1,4 @@
> +cl_khr_int64_extended_atomics/minmax_helpers.ll
> +workitem/get_global_size.ll
> +workitem/get_local_size.ll
> +workitem/get_num_groups.ll
> diff --git a/amdgcn/lib/OVERRIDES_5.0 b/amdgcn/lib/OVERRIDES_5.0
> new file mode 100644
> index 0000000..3268f67
> --- /dev/null
> +++ b/amdgcn/lib/OVERRIDES_5.0
> @@ -0,0 +1,4 @@
> +cl_khr_int64_extended_atomics/minmax_helpers.ll
> +workitem/get_global_size.ll
> +workitem/get_local_size.ll
> +workitem/get_num_groups.ll
> diff --git a/amdgcn/lib/OVERRIDES_6.0 b/amdgcn/lib/OVERRIDES_6.0
> new file mode 100644
> index 0000000..3268f67
> --- /dev/null
> +++ b/amdgcn/lib/OVERRIDES_6.0
> @@ -0,0 +1,4 @@
> +cl_khr_int64_extended_atomics/minmax_helpers.ll
> +workitem/get_global_size.ll
> +workitem/get_local_size.ll
> +workitem/get_num_groups.ll
> diff --git a/amdgcn/lib/SOURCES b/amdgcn/lib/SOURCES
> index 8c177bd..8e14ce2 100644
> --- a/amdgcn/lib/SOURCES
> +++ b/amdgcn/lib/SOURCES
> @@ -1,7 +1,6 @@
>  cl_khr_int64_extended_atomics/minmax_helpers.ll
>  math/ldexp.cl
>  mem_fence/fence.cl
> -mem_fence/waitcnt.ll
>  synchronization/barrier.cl
>  workitem/get_global_offset.cl
>  workitem/get_group_id.cl
> diff --git a/amdgcn/lib/SOURCES_3.9 b/amdgcn/lib/SOURCES_3.9
> index 6ee7cb8..3cecdb0 100644
> --- a/amdgcn/lib/SOURCES_3.9
> +++ b/amdgcn/lib/SOURCES_3.9
> @@ -1,3 +1,5 @@
> +cl_khr_int64_extended_atomics/minmax_helpers.39.ll
> +mem_fence/waitcnt.ll
>  workitem/get_global_size.39.ll
>  workitem/get_local_size.39.ll
>  workitem/get_num_groups.39.ll
> diff --git a/amdgcn/lib/SOURCES_4.0 b/amdgcn/lib/SOURCES_4.0
> new file mode 100644
> index 0000000..5ed1d7c
> --- /dev/null
> +++ b/amdgcn/lib/SOURCES_4.0
> @@ -0,0 +1,5 @@
> +cl_khr_int64_extended_atomics/minmax_helpers.39.ll
> +mem_fence/waitcnt.ll
> +workitem/get_global_size.40.ll
> +workitem/get_local_size.40.ll
> +workitem/get_num_groups.40.ll
> diff --git a/amdgcn/lib/SOURCES_5.0 b/amdgcn/lib/SOURCES_5.0
> new file mode 100644
> index 0000000..45c51ec
> --- /dev/null
> +++ b/amdgcn/lib/SOURCES_5.0
> @@ -0,0 +1,4 @@
> +cl_khr_int64_extended_atomics/minmax_helpers.39.ll
> +workitem/get_global_size.40.ll
> +workitem/get_local_size.40.ll
> +workitem/get_num_groups.40.ll
> diff --git a/amdgcn/lib/SOURCES_6.0 b/amdgcn/lib/SOURCES_6.0
> new file mode 100644
> index 0000000..45c51ec
> --- /dev/null
> +++ b/amdgcn/lib/SOURCES_6.0
> @@ -0,0 +1,4 @@
> +cl_khr_int64_extended_atomics/minmax_helpers.39.ll
> +workitem/get_global_size.40.ll
> +workitem/get_local_size.40.ll
> +workitem/get_num_groups.40.ll
> diff --git a/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.39.ll b/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.39.ll
> new file mode 100644
> index 0000000..ec14179
> --- /dev/null
> +++ b/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.39.ll
> @@ -0,0 +1,49 @@
> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
> +
> +define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
> +entry:
> +  %0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst
> +  ret i64 %0
> +}
> +
> +define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
> +entry:
> +  %0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst
> +  ret i64 %0
> +}
> +
> +define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
> +entry:
> +  %0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst
> +  ret i64 %0
> +}
> +
> +define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
> +entry:
> +  %0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst
> +  ret i64 %0
> +}
> +
> +define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
> +entry:
> +  %0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst
> +  ret i64 %0
> +}
> +
> +define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
> +entry:
> +  %0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst
> +  ret i64 %0
> +}
> +
> +define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
> +entry:
> +  %0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst
> +  ret i64 %0
> +}
> +
> +define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {
> +entry:
> +  %0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst
> +  ret i64 %0
> +}
> diff --git a/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll b/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll
> index ec14179..509d535 100644
> --- a/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll
> +++ b/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll
> @@ -1,4 +1,4 @@
> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
>  
>  define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
>  entry:
> diff --git a/amdgcn/lib/workitem/get_global_size.40.ll b/amdgcn/lib/workitem/get_global_size.40.ll
> new file mode 100644
> index 0000000..3d26d2f
> --- /dev/null
> +++ b/amdgcn/lib/workitem/get_global_size.40.ll
> @@ -0,0 +1,23 @@
> +declare i32 @llvm.r600.read.global.size.x() nounwind readnone
> +declare i32 @llvm.r600.read.global.size.y() nounwind readnone
> +declare i32 @llvm.r600.read.global.size.z() nounwind readnone
> +
> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
> +
> +define i64 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
> +  switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> +x_dim:
> +  %x = call i32 @llvm.r600.read.global.size.x()
> +  %x.ext = zext i32 %x to i64
> +  ret i64 %x.ext
> +y_dim:
> +  %y = call i32 @llvm.r600.read.global.size.y()
> +  %y.ext = zext i32 %y to i64
> +  ret i64 %y.ext
> +z_dim:
> +  %z = call i32 @llvm.r600.read.global.size.z()
> +  %z.ext = zext i32 %z to i64
> +  ret i64 %z.ext
> +default:
> +  ret i64 1
> +}
> diff --git a/amdgcn/lib/workitem/get_global_size.ll b/amdgcn/lib/workitem/get_global_size.ll
> index 3d26d2f..3292af5 100644
> --- a/amdgcn/lib/workitem/get_global_size.ll
> +++ b/amdgcn/lib/workitem/get_global_size.ll
> @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.global.size.x() nounwind readnone
>  declare i32 @llvm.r600.read.global.size.y() nounwind readnone
>  declare i32 @llvm.r600.read.global.size.z() nounwind readnone
>  
> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
>  
>  define i64 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
>    switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> diff --git a/amdgcn/lib/workitem/get_local_size.40.ll b/amdgcn/lib/workitem/get_local_size.40.ll
> new file mode 100644
> index 0000000..36141f9
> --- /dev/null
> +++ b/amdgcn/lib/workitem/get_local_size.40.ll
> @@ -0,0 +1,23 @@
> +declare i32 @llvm.r600.read.local.size.x() nounwind readnone
> +declare i32 @llvm.r600.read.local.size.y() nounwind readnone
> +declare i32 @llvm.r600.read.local.size.z() nounwind readnone
> +
> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
> +
> +define i64 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
> +  switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> +x_dim:
> +  %x = call i32 @llvm.r600.read.local.size.x()
> +  %x.ext = zext i32 %x to i64
> +  ret i64 %x.ext
> +y_dim:
> +  %y = call i32 @llvm.r600.read.local.size.y()
> +  %y.ext = zext i32 %y to i64
> +  ret i64 %y.ext
> +z_dim:
> +  %z = call i32 @llvm.r600.read.local.size.z()
> +  %z.ext = zext i32 %z to i64
> +  ret i64 %z.ext
> +default:
> +  ret i64 1
> +}
> diff --git a/amdgcn/lib/workitem/get_local_size.ll b/amdgcn/lib/workitem/get_local_size.ll
> index 36141f9..1b5fec1 100644
> --- a/amdgcn/lib/workitem/get_local_size.ll
> +++ b/amdgcn/lib/workitem/get_local_size.ll
> @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.local.size.x() nounwind readnone
>  declare i32 @llvm.r600.read.local.size.y() nounwind readnone
>  declare i32 @llvm.r600.read.local.size.z() nounwind readnone
>  
> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
>  
>  define i64 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
>    switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> diff --git a/amdgcn/lib/workitem/get_num_groups.40.ll b/amdgcn/lib/workitem/get_num_groups.40.ll
> new file mode 100644
> index 0000000..12ec8ea
> --- /dev/null
> +++ b/amdgcn/lib/workitem/get_num_groups.40.ll
> @@ -0,0 +1,23 @@
> +declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
> +declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
> +declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
> +
> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
> +
> +define i64 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
> +  switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
> +x_dim:
> +  %x = call i32 @llvm.r600.read.ngroups.x()
> +  %x.ext = zext i32 %x to i64
> +  ret i64 %x.ext
> +y_dim:
> +  %y = call i32 @llvm.r600.read.ngroups.y()
> +  %y.ext = zext i32 %y to i64
> +  ret i64 %y.ext
> +z_dim:
> +  %z = call i32 @llvm.r600.read.ngroups.z()
> +  %z.ext = zext i32 %z to i64
> +  ret i64 %z.ext
> +default:
> +  ret i64 1
> +}
> diff --git a/amdgcn/lib/workitem/get_num_groups.ll b/amdgcn/lib/workitem/get_num_groups.ll
> index 12ec8ea..8fb55ed 100644
> --- a/amdgcn/lib/workitem/get_num_groups.ll
> +++ b/amdgcn/lib/workitem/get_num_groups.ll
> @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
>  declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
>  declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
>  
> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
>  
>  define i64 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
>    switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]

ping
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20180219/c798fa90/attachment.sig>


More information about the Libclc-dev mailing list