[Libclc-dev] [PATCH 1/4] amdgcn: Fix datalayout after clang r324101
Aaron Watry via Libclc-dev
libclc-dev at lists.llvm.org
Tue Feb 20 19:11:17 PST 2018
On Mon, Feb 19, 2018 at 3:22 PM, Jan Vesely via Libclc-dev <
libclc-dev at lists.llvm.org> wrote:
> On Tue, 2018-02-13 at 20:30 -0500, Jan Vesely wrote:
>> r324101 switched around AS numbering
>>
>> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
>> ---
>> amdgcn-amdhsa/lib/OVERRIDES | 3 ++
>> amdgcn/lib/OVERRIDES_3.9 | 1 +
>> amdgcn/lib/OVERRIDES_4.0 | 4 ++
>> amdgcn/lib/OVERRIDES_5.0 | 4 ++
>> amdgcn/lib/OVERRIDES_6.0 | 4 ++
>> amdgcn/lib/SOURCES | 1 -
>> amdgcn/lib/SOURCES_3.9 | 2 +
>> amdgcn/lib/SOURCES_4.0 | 5 +++
>> amdgcn/lib/SOURCES_5.0 | 4 ++
>> amdgcn/lib/SOURCES_6.0 | 4 ++
>> .../minmax_helpers.39.ll | 49
++++++++++++++++++++++
>> .../minmax_helpers.ll | 2 +-
>> amdgcn/lib/workitem/get_global_size.40.ll | 23 ++++++++++
>> amdgcn/lib/workitem/get_global_size.ll | 2 +-
>> amdgcn/lib/workitem/get_local_size.40.ll | 23 ++++++++++
>> amdgcn/lib/workitem/get_local_size.ll | 2 +-
>> amdgcn/lib/workitem/get_num_groups.40.ll | 23 ++++++++++
>> amdgcn/lib/workitem/get_num_groups.ll | 2 +-
>> 18 files changed, 153 insertions(+), 5 deletions(-)
>> create mode 100644 amdgcn/lib/OVERRIDES_4.0
>> create mode 100644 amdgcn/lib/OVERRIDES_5.0
>> create mode 100644 amdgcn/lib/OVERRIDES_6.0
>> create mode 100644 amdgcn/lib/SOURCES_4.0
>> create mode 100644 amdgcn/lib/SOURCES_5.0
>> create mode 100644 amdgcn/lib/SOURCES_6.0
>> create mode 100644 amdgcn/lib/cl_khr_int64_extend
ed_atomics/minmax_helpers.39.ll
>> create mode 100644 amdgcn/lib/workitem/get_global_size.40.ll
>> create mode 100644 amdgcn/lib/workitem/get_local_size.40.ll
>> create mode 100644 amdgcn/lib/workitem/get_num_groups.40.ll
>>
>> diff --git a/amdgcn-amdhsa/lib/OVERRIDES b/amdgcn-amdhsa/lib/OVERRIDES
>> index c9bd69b..18ce3f8 100644
>> --- a/amdgcn-amdhsa/lib/OVERRIDES
>> +++ b/amdgcn-amdhsa/lib/OVERRIDES
>> @@ -1 +1,4 @@
>> workitem/get_num_groups.ll
>> +workitem/get_num_groups.40.ll
>> +workitem/get_global_size.40.ll
>> +workitem/get_local_size.40.ll
>> diff --git a/amdgcn/lib/OVERRIDES_3.9 b/amdgcn/lib/OVERRIDES_3.9
>> index f26fbbe..3268f67 100644
>> --- a/amdgcn/lib/OVERRIDES_3.9
>> +++ b/amdgcn/lib/OVERRIDES_3.9
>> @@ -1,3 +1,4 @@
>> +cl_khr_int64_extended_atomics/minmax_helpers.ll
>> workitem/get_global_size.ll
>> workitem/get_local_size.ll
>> workitem/get_num_groups.ll
>> diff --git a/amdgcn/lib/OVERRIDES_4.0 b/amdgcn/lib/OVERRIDES_4.0
>> new file mode 100644
>> index 0000000..3268f67
>> --- /dev/null
>> +++ b/amdgcn/lib/OVERRIDES_4.0
>> @@ -0,0 +1,4 @@
>> +cl_khr_int64_extended_atomics/minmax_helpers.ll
>> +workitem/get_global_size.ll
>> +workitem/get_local_size.ll
>> +workitem/get_num_groups.ll
>> diff --git a/amdgcn/lib/OVERRIDES_5.0 b/amdgcn/lib/OVERRIDES_5.0
>> new file mode 100644
>> index 0000000..3268f67
>> --- /dev/null
>> +++ b/amdgcn/lib/OVERRIDES_5.0
>> @@ -0,0 +1,4 @@
>> +cl_khr_int64_extended_atomics/minmax_helpers.ll
>> +workitem/get_global_size.ll
>> +workitem/get_local_size.ll
>> +workitem/get_num_groups.ll
>> diff --git a/amdgcn/lib/OVERRIDES_6.0 b/amdgcn/lib/OVERRIDES_6.0
>> new file mode 100644
>> index 0000000..3268f67
>> --- /dev/null
>> +++ b/amdgcn/lib/OVERRIDES_6.0
>> @@ -0,0 +1,4 @@
>> +cl_khr_int64_extended_atomics/minmax_helpers.ll
>> +workitem/get_global_size.ll
>> +workitem/get_local_size.ll
>> +workitem/get_num_groups.ll
>> diff --git a/amdgcn/lib/SOURCES b/amdgcn/lib/SOURCES
>> index 8c177bd..8e14ce2 100644
>> --- a/amdgcn/lib/SOURCES
>> +++ b/amdgcn/lib/SOURCES
>> @@ -1,7 +1,6 @@
>> cl_khr_int64_extended_atomics/minmax_helpers.ll
>> math/ldexp.cl
>> mem_fence/fence.cl
>> -mem_fence/waitcnt.ll
>> synchronization/barrier.cl
>> workitem/get_global_offset.cl
>> workitem/get_group_id.cl
>> diff --git a/amdgcn/lib/SOURCES_3.9 b/amdgcn/lib/SOURCES_3.9
>> index 6ee7cb8..3cecdb0 100644
>> --- a/amdgcn/lib/SOURCES_3.9
>> +++ b/amdgcn/lib/SOURCES_3.9
>> @@ -1,3 +1,5 @@
>> +cl_khr_int64_extended_atomics/minmax_helpers.39.ll
>> +mem_fence/waitcnt.ll
>> workitem/get_global_size.39.ll
>> workitem/get_local_size.39.ll
>> workitem/get_num_groups.39.ll
>> diff --git a/amdgcn/lib/SOURCES_4.0 b/amdgcn/lib/SOURCES_4.0
>> new file mode 100644
>> index 0000000..5ed1d7c
>> --- /dev/null
>> +++ b/amdgcn/lib/SOURCES_4.0
>> @@ -0,0 +1,5 @@
>> +cl_khr_int64_extended_atomics/minmax_helpers.39.ll
>> +mem_fence/waitcnt.ll
>> +workitem/get_global_size.40.ll
>> +workitem/get_local_size.40.ll
>> +workitem/get_num_groups.40.ll
>> diff --git a/amdgcn/lib/SOURCES_5.0 b/amdgcn/lib/SOURCES_5.0
>> new file mode 100644
>> index 0000000..45c51ec
>> --- /dev/null
>> +++ b/amdgcn/lib/SOURCES_5.0
>> @@ -0,0 +1,4 @@
>> +cl_khr_int64_extended_atomics/minmax_helpers.39.ll
>> +workitem/get_global_size.40.ll
>> +workitem/get_local_size.40.ll
>> +workitem/get_num_groups.40.ll
>> diff --git a/amdgcn/lib/SOURCES_6.0 b/amdgcn/lib/SOURCES_6.0
>> new file mode 100644
>> index 0000000..45c51ec
>> --- /dev/null
>> +++ b/amdgcn/lib/SOURCES_6.0
>> @@ -0,0 +1,4 @@
>> +cl_khr_int64_extended_atomics/minmax_helpers.39.ll
>> +workitem/get_global_size.40.ll
>> +workitem/get_local_size.40.ll
>> +workitem/get_num_groups.40.ll
>> diff --git a/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.39.ll
b/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.39.ll
>> new file mode 100644
>> index 0000000..ec14179
>> --- /dev/null
>> +++ b/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.39.ll
>> @@ -0,0 +1,49 @@
>> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p
3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
>> +
>> +define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)*
nocapture %ptr, i64 %value) nounwind alwaysinline {
>> +entry:
>> + %0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst
>> + ret i64 %0
>> +}
>> +
>> +define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)*
nocapture %ptr, i64 %value) nounwind alwaysinline {
>> +entry:
>> + %0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value
seq_cst
>> + ret i64 %0
>> +}
>> +
>> +define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)*
nocapture %ptr, i64 %value) nounwind alwaysinline {
>> +entry:
>> + %0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst
>> + ret i64 %0
>> +}
>> +
>> +define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)*
nocapture %ptr, i64 %value) nounwind alwaysinline {
>> +entry:
>> + %0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value
seq_cst
>> + ret i64 %0
>> +}
>> +
>> +define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)*
nocapture %ptr, i64 %value) nounwind alwaysinline {
>> +entry:
>> + %0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst
>> + ret i64 %0
>> +}
>> +
>> +define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)*
nocapture %ptr, i64 %value) nounwind alwaysinline {
>> +entry:
>> + %0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value
seq_cst
>> + ret i64 %0
>> +}
>> +
>> +define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)*
nocapture %ptr, i64 %value) nounwind alwaysinline {
>> +entry:
>> + %0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst
>> + ret i64 %0
>> +}
>> +
>> +define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)*
nocapture %ptr, i64 %value) nounwind alwaysinline {
>> +entry:
>> + %0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value
seq_cst
>> + ret i64 %0
>> +}
>> diff --git a/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll
b/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll
>> index ec14179..509d535 100644
>> --- a/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll
>> +++ b/amdgcn/lib/cl_khr_int64_extended_atomics/minmax_helpers.ll
>> @@ -1,4 +1,4 @@
>> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p
3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
>> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p
3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
>>
>> define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)*
nocapture %ptr, i64 %value) nounwind alwaysinline {
>> entry:
>> diff --git a/amdgcn/lib/workitem/get_global_size.40.ll
b/amdgcn/lib/workitem/get_global_size.40.ll
>> new file mode 100644
>> index 0000000..3d26d2f
>> --- /dev/null
>> +++ b/amdgcn/lib/workitem/get_global_size.40.ll
>> @@ -0,0 +1,23 @@
>> +declare i32 @llvm.r600.read.global.size.x() nounwind readnone
>> +declare i32 @llvm.r600.read.global.size.y() nounwind readnone
>> +declare i32 @llvm.r600.read.global.size.z() nounwind readnone
>> +
>> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p
3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
>> +
>> +define i64 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
>> + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label
%y_dim i32 2, label %z_dim]
>> +x_dim:
>> + %x = call i32 @llvm.r600.read.global.size.x()
>> + %x.ext = zext i32 %x to i64
>> + ret i64 %x.ext
>> +y_dim:
>> + %y = call i32 @llvm.r600.read.global.size.y()
>> + %y.ext = zext i32 %y to i64
>> + ret i64 %y.ext
>> +z_dim:
>> + %z = call i32 @llvm.r600.read.global.size.z()
>> + %z.ext = zext i32 %z to i64
>> + ret i64 %z.ext
>> +default:
>> + ret i64 1
>> +}
>> diff --git a/amdgcn/lib/workitem/get_global_size.ll
b/amdgcn/lib/workitem/get_global_size.ll
>> index 3d26d2f..3292af5 100644
>> --- a/amdgcn/lib/workitem/get_global_size.ll
>> +++ b/amdgcn/lib/workitem/get_global_size.ll
>> @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.global.size.x() nounwind
readnone
>> declare i32 @llvm.r600.read.global.size.y() nounwind readnone
>> declare i32 @llvm.r600.read.global.size.z() nounwind readnone
>>
>> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p
3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
>> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p
3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
>>
>> define i64 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
>> switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label
%y_dim i32 2, label %z_dim]
>> diff --git a/amdgcn/lib/workitem/get_local_size.40.ll
b/amdgcn/lib/workitem/get_local_size.40.ll
>> new file mode 100644
>> index 0000000..36141f9
>> --- /dev/null
>> +++ b/amdgcn/lib/workitem/get_local_size.40.ll
>> @@ -0,0 +1,23 @@
>> +declare i32 @llvm.r600.read.local.size.x() nounwind readnone
>> +declare i32 @llvm.r600.read.local.size.y() nounwind readnone
>> +declare i32 @llvm.r600.read.local.size.z() nounwind readnone
>> +
>> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p
3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
>> +
>> +define i64 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
>> + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label
%y_dim i32 2, label %z_dim]
>> +x_dim:
>> + %x = call i32 @llvm.r600.read.local.size.x()
>> + %x.ext = zext i32 %x to i64
>> + ret i64 %x.ext
>> +y_dim:
>> + %y = call i32 @llvm.r600.read.local.size.y()
>> + %y.ext = zext i32 %y to i64
>> + ret i64 %y.ext
>> +z_dim:
>> + %z = call i32 @llvm.r600.read.local.size.z()
>> + %z.ext = zext i32 %z to i64
>> + ret i64 %z.ext
>> +default:
>> + ret i64 1
>> +}
>> diff --git a/amdgcn/lib/workitem/get_local_size.ll
b/amdgcn/lib/workitem/get_local_size.ll
>> index 36141f9..1b5fec1 100644
>> --- a/amdgcn/lib/workitem/get_local_size.ll
>> +++ b/amdgcn/lib/workitem/get_local_size.ll
>> @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.local.size.x() nounwind
readnone
>> declare i32 @llvm.r600.read.local.size.y() nounwind readnone
>> declare i32 @llvm.r600.read.local.size.z() nounwind readnone
>>
>> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p
3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
>> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p
3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
>>
>> define i64 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
>> switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label
%y_dim i32 2, label %z_dim]
>> diff --git a/amdgcn/lib/workitem/get_num_groups.40.ll
b/amdgcn/lib/workitem/get_num_groups.40.ll
>> new file mode 100644
>> index 0000000..12ec8ea
>> --- /dev/null
>> +++ b/amdgcn/lib/workitem/get_num_groups.40.ll
>> @@ -0,0 +1,23 @@
>> +declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
>> +declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
>> +declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
>> +
>> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p
3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
>> +
>> +define i64 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
>> + switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label
%y_dim i32 2, label %z_dim]
>> +x_dim:
>> + %x = call i32 @llvm.r600.read.ngroups.x()
>> + %x.ext = zext i32 %x to i64
>> + ret i64 %x.ext
>> +y_dim:
>> + %y = call i32 @llvm.r600.read.ngroups.y()
>> + %y.ext = zext i32 %y to i64
>> + ret i64 %y.ext
>> +z_dim:
>> + %z = call i32 @llvm.r600.read.ngroups.z()
>> + %z.ext = zext i32 %z to i64
>> + ret i64 %z.ext
>> +default:
>> + ret i64 1
>> +}
>> diff --git a/amdgcn/lib/workitem/get_num_groups.ll
b/amdgcn/lib/workitem/get_num_groups.ll
>> index 12ec8ea..8fb55ed 100644
>> --- a/amdgcn/lib/workitem/get_num_groups.ll
>> +++ b/amdgcn/lib/workitem/get_num_groups.ll
>> @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
>> declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
>> declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
>>
>> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p
3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
>> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p
3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:
64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
>>
>> define i64 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
>> switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label
%y_dim i32 2, label %z_dim]
>
> ping
I've at least verified that this series fixes the build for me, haven't had
a chance to run any CTS runs against applicable tests yet.
If I don't get back to you in the next 24 hours you can at least put my
acked by on this series.
--Aaron
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20180221/f51ef6d5/attachment-0001.html>
More information about the Libclc-dev
mailing list