<br><span>
</span><br><span>
On Mon, Feb 19, 2018 at 3:22 PM, Jan Vesely via Libclc-dev <</span><a href="mailto:libclc-dev@lists.llvm.org" target="_blank">libclc-dev@lists.llvm.org</a><span>> wrote:</span><br><span>
> On Tue, 2018-02-13 at 20:30 -0500, Jan Vesely wrote:</span><br><span>
>> r324101 switched around AS numbering</span><br><span>
>></span><br><span>
>> Signed-off-by: Jan Vesely <</span><a href="mailto:jan.vesely@rutgers.edu" target="_blank">jan.vesely@rutgers.edu</a><span>></span><br><span>
>> ---</span><br><span>
>>  amdgcn-amdhsa/lib/OVERRIDES                        |  3 ++</span><br><span>
>>  amdgcn/lib/OVERRIDES_3.9                           |  1 +</span><br><span>
>>  amdgcn/lib/OVERRIDES_4.0                           |  4 ++</span><br><span>
>>  amdgcn/lib/OVERRIDES_5.0                           |  4 ++</span><br><span>
>>  amdgcn/lib/OVERRIDES_6.0                           |  4 ++</span><br><span>
>>  amdgcn/lib/SOURCES                                 |  1 -</span><br><span>
>>  amdgcn/lib/SOURCES_3.9                             |  2 +</span><br><span>
>>  amdgcn/lib/SOURCES_4.0                             |  5 +++</span><br><span>
>>  amdgcn/lib/SOURCES_5.0                             |  4 ++</span><br><span>
>>  amdgcn/lib/SOURCES_6.0                             |  4 ++</span><br><span>
>>  .../minmax_helpers.39.ll                           | 49 ++++++++++++++++++++++</span><br><span>
>>  .../minmax_helpers.ll                              |  2 +-</span><br><span>
>>  amdgcn/lib/workitem/get_global</span><span>_size.40.ll          | 23 ++++++++++</span><br><span>
>>  amdgcn/lib/workitem/get_global</span><span>_size.ll             |  2 +-</span><br><span>
>>  amdgcn/lib/workitem/get_local_</span><span>size.40.ll           | 23 ++++++++++</span><br><span>
>>  amdgcn/lib/workitem/get_local_</span><span>size.ll              |  2 +-</span><br><span>
>>  amdgcn/lib/workitem/get_num_gr</span><span>oups.40.ll           | 23 ++++++++++</span><br><span>
>>  amdgcn/lib/workitem/get_num_gr</span><span>oups.ll              |  2 +-</span><br><span>
>>  18 files changed, 153 insertions(+), 5 deletions(-)</span><br><span>
>>  create mode 100644 amdgcn/lib/OVERRIDES_4.0</span><br><span>
>>  create mode 100644 amdgcn/lib/OVERRIDES_5.0</span><br><span>
>>  create mode 100644 amdgcn/lib/OVERRIDES_6.0</span><br><span>
>>  create mode 100644 amdgcn/lib/SOURCES_4.0</span><br><span>
>>  create mode 100644 amdgcn/lib/SOURCES_5.0</span><br><span>
>>  create mode 100644 amdgcn/lib/SOURCES_6.0</span><br><span>
>>  create mode 100644 amdgcn/lib/cl_khr_int64_extend</span><span>ed_atomics/minmax_helpers.39.</span><span>ll</span><br><span>
>>  create mode 100644 amdgcn/lib/workitem/get_global</span><span>_size.40.ll</span><br><span>
>>  create mode 100644 amdgcn/lib/workitem/get_local_</span><span>size.40.ll</span><br><span>
>>  create mode 100644 amdgcn/lib/workitem/get_num_gr</span><span>oups.40.ll</span><br><span>
>></span><br><span>
>> diff --git a/amdgcn-amdhsa/lib/OVERRIDES b/amdgcn-amdhsa/lib/OVERRIDES</span><br><span>
>> index c9bd69b..18ce3f8 100644</span><br><span>
>> --- a/amdgcn-amdhsa/lib/OVERRIDES</span><br><span>
>> +++ b/amdgcn-amdhsa/lib/OVERRIDES</span><br><span>
>> @@ -1 +1,4 @@</span><br><span>
>>  workitem/get_num_groups.ll</span><br><span>
>> +workitem/get_num_groups.40.ll</span><br><span>
>> +workitem/get_global_size.40.l</span><span>l</span><br><span>
>> +workitem/get_local_size.40.ll</span><br><span>
>> diff --git a/amdgcn/lib/OVERRIDES_3.9 b/amdgcn/lib/OVERRIDES_3.9</span><br><span>
>> index f26fbbe..3268f67 100644</span><br><span>
>> --- a/amdgcn/lib/OVERRIDES_3.9</span><br><span>
>> +++ b/amdgcn/lib/OVERRIDES_3.9</span><br><span>
>> @@ -1,3 +1,4 @@</span><br><span>
>> +cl_khr_int64_extended_atomics</span><span>/minmax_helpers.ll</span><br><span>
>>  workitem/get_global_size.ll</span><br><span>
>>  workitem/get_local_size.ll</span><br><span>
>>  workitem/get_num_groups.ll</span><br><span>
>> diff --git a/amdgcn/lib/OVERRIDES_4.0 b/amdgcn/lib/OVERRIDES_4.0</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..3268f67</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/OVERRIDES_4.0</span><br><span>
>> @@ -0,0 +1,4 @@</span><br><span>
>> +cl_khr_int64_extended_atomics</span><span>/minmax_helpers.ll</span><br><span>
>> +workitem/get_global_size.ll</span><br><span>
>> +workitem/get_local_size.ll</span><br><span>
>> +workitem/get_num_groups.ll</span><br><span>
>> diff --git a/amdgcn/lib/OVERRIDES_5.0 b/amdgcn/lib/OVERRIDES_5.0</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..3268f67</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/OVERRIDES_5.0</span><br><span>
>> @@ -0,0 +1,4 @@</span><br><span>
>> +cl_khr_int64_extended_atomics</span><span>/minmax_helpers.ll</span><br><span>
>> +workitem/get_global_size.ll</span><br><span>
>> +workitem/get_local_size.ll</span><br><span>
>> +workitem/get_num_groups.ll</span><br><span>
>> diff --git a/amdgcn/lib/OVERRIDES_6.0 b/amdgcn/lib/OVERRIDES_6.0</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..3268f67</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/OVERRIDES_6.0</span><br><span>
>> @@ -0,0 +1,4 @@</span><br><span>
>> +cl_khr_int64_extended_atomics</span><span>/minmax_helpers.ll</span><br><span>
>> +workitem/get_global_size.ll</span><br><span>
>> +workitem/get_local_size.ll</span><br><span>
>> +workitem/get_num_groups.ll</span><br><span>
>> diff --git a/amdgcn/lib/SOURCES b/amdgcn/lib/SOURCES</span><br><span>
>> index 8c177bd..8e14ce2 100644</span><br><span>
>> --- a/amdgcn/lib/SOURCES</span><br><span>
>> +++ b/amdgcn/lib/SOURCES</span><br><span>
>> @@ -1,7 +1,6 @@</span><br><span>
>>  cl_khr_int64_extended_atomics/</span><span>minmax_helpers.ll</span><br><span>
>>  math/</span><a href="http://ldexp.cl" rel="noreferrer" target="_blank">ldexp.cl</a><br><span>
>>  mem_fence/</span><a href="http://fence.cl" rel="noreferrer" target="_blank">fence.cl</a><br><span>
>> -mem_fence/waitcnt.ll</span><br><span>
>>  synchronization/</span><a href="http://barrier.cl" rel="noreferrer" target="_blank">barrier.cl</a><br><span>
>>  workitem/</span><a href="http://get_global_offset.cl" rel="noreferrer" target="_blank">get_global_offset.cl</a><br><span>
>>  workitem/</span><a href="http://get_group_id.cl" rel="noreferrer" target="_blank">get_group_id.cl</a><br><span>
>> diff --git a/amdgcn/lib/SOURCES_3.9 b/amdgcn/lib/SOURCES_3.9</span><br><span>
>> index 6ee7cb8..3cecdb0 100644</span><br><span>
>> --- a/amdgcn/lib/SOURCES_3.9</span><br><span>
>> +++ b/amdgcn/lib/SOURCES_3.9</span><br><span>
>> @@ -1,3 +1,5 @@</span><br><span>
>> +cl_khr_int64_extended_atomics</span><span>/minmax_helpers.39.ll</span><br><span>
>> +mem_fence/waitcnt.ll</span><br><span>
>>  workitem/get_global_size.39.ll</span><br><span>
>>  workitem/get_local_size.39.ll</span><br><span>
>>  workitem/get_num_groups.39.ll</span><br><span>
>> diff --git a/amdgcn/lib/SOURCES_4.0 b/amdgcn/lib/SOURCES_4.0</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..5ed1d7c</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/SOURCES_4.0</span><br><span>
>> @@ -0,0 +1,5 @@</span><br><span>
>> +cl_khr_int64_extended_atomics</span><span>/minmax_helpers.39.ll</span><br><span>
>> +mem_fence/waitcnt.ll</span><br><span>
>> +workitem/get_global_size.40.l</span><span>l</span><br><span>
>> +workitem/get_local_size.40.ll</span><br><span>
>> +workitem/get_num_groups.40.ll</span><br><span>
>> diff --git a/amdgcn/lib/SOURCES_5.0 b/amdgcn/lib/SOURCES_5.0</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..45c51ec</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/SOURCES_5.0</span><br><span>
>> @@ -0,0 +1,4 @@</span><br><span>
>> +cl_khr_int64_extended_atomics</span><span>/minmax_helpers.39.ll</span><br><span>
>> +workitem/get_global_size.40.l</span><span>l</span><br><span>
>> +workitem/get_local_size.40.ll</span><br><span>
>> +workitem/get_num_groups.40.ll</span><br><span>
>> diff --git a/amdgcn/lib/SOURCES_6.0 b/amdgcn/lib/SOURCES_6.0</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..45c51ec</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/SOURCES_6.0</span><br><span>
>> @@ -0,0 +1,4 @@</span><br><span>
>> +cl_khr_int64_extended_atomics</span><span>/minmax_helpers.39.ll</span><br><span>
>> +workitem/get_global_size.40.l</span><span>l</span><br><span>
>> +workitem/get_local_size.40.ll</span><br><span>
>> +workitem/get_num_groups.40.ll</span><br><span>
>> diff --git a/amdgcn/lib/cl_khr_int64_exte</span><span>nded_atomics/minmax_helpers.</span><span>39.ll b/amdgcn/lib/cl_khr_int64_exte</span><span>nded_atomics/minmax_helpers.</span><span>39.ll</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..ec14179</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/cl_khr_int64_exte</span><span>nded_atomics/minmax_helpers.</span><span>39.ll</span><br><span>
>> @@ -0,0 +1,49 @@</span><br><span>
>> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:64:64-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64"</span><br><span>
>> +</span><br><span>
>> +define i64 @__clc__sync_fetch_and_min_glo</span><span>bal_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {</span><br><span>
>> +entry:</span><br><span>
>> +  %0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst</span><br><span>
>> +  ret i64 %0</span><br><span>
>> +}</span><br><span>
>> +</span><br><span>
>> +define i64 @__clc__sync_fetch_and_umin_gl</span><span>obal_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {</span><br><span>
>> +entry:</span><br><span>
>> +  %0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst</span><br><span>
>> +  ret i64 %0</span><br><span>
>> +}</span><br><span>
>> +</span><br><span>
>> +define i64 @__clc__sync_fetch_and_min_loc</span><span>al_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {</span><br><span>
>> +entry:</span><br><span>
>> +  %0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst</span><br><span>
>> +  ret i64 %0</span><br><span>
>> +}</span><br><span>
>> +</span><br><span>
>> +define i64 @__clc__sync_fetch_and_umin_lo</span><span>cal_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {</span><br><span>
>> +entry:</span><br><span>
>> +  %0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst</span><br><span>
>> +  ret i64 %0</span><br><span>
>> +}</span><br><span>
>> +</span><br><span>
>> +define i64 @__clc__sync_fetch_and_max_glo</span><span>bal_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {</span><br><span>
>> +entry:</span><br><span>
>> +  %0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst</span><br><span>
>> +  ret i64 %0</span><br><span>
>> +}</span><br><span>
>> +</span><br><span>
>> +define i64 @__clc__sync_fetch_and_umax_gl</span><span>obal_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {</span><br><span>
>> +entry:</span><br><span>
>> +  %0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst</span><br><span>
>> +  ret i64 %0</span><br><span>
>> +}</span><br><span>
>> +</span><br><span>
>> +define i64 @__clc__sync_fetch_and_max_loc</span><span>al_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {</span><br><span>
>> +entry:</span><br><span>
>> +  %0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst</span><br><span>
>> +  ret i64 %0</span><br><span>
>> +}</span><br><span>
>> +</span><br><span>
>> +define i64 @__clc__sync_fetch_and_umax_lo</span><span>cal_8(i64 addrspace(3)* nocapture %ptr, i64 %value) nounwind alwaysinline {</span><br><span>
>> +entry:</span><br><span>
>> +  %0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst</span><br><span>
>> +  ret i64 %0</span><br><span>
>> +}</span><br><span>
>> diff --git a/amdgcn/lib/cl_khr_int64_exte</span><span>nded_atomics/minmax_helpers.ll b/amdgcn/lib/cl_khr_int64_exte</span><span>nded_atomics/minmax_helpers.ll</span><br><span>
>> index ec14179..509d535 100644</span><br><span>
>> --- a/amdgcn/lib/cl_khr_int64_exte</span><span>nded_atomics/minmax_helpers.ll</span><br><span>
>> +++ b/amdgcn/lib/cl_khr_int64_exte</span><span>nded_atomics/minmax_helpers.ll</span><br><span>
>> @@ -1,4 +1,4 @@</span><br><span>
>> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:64:64-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64"</span><br><span>
>> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:32:32-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64-A5"</span><br><span>
>></span><br><span>
>>  define i64 @__clc__sync_fetch_and_min_glo</span><span>bal_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {</span><br><span>
>>  entry:</span><br><span>
>> diff --git a/amdgcn/lib/workitem/get_glob</span><span>al_size.40.ll b/amdgcn/lib/workitem/get_glob</span><span>al_size.40.ll</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..3d26d2f</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/workitem/get_glob</span><span>al_size.40.ll</span><br><span>
>> @@ -0,0 +1,23 @@</span><br><span>
>> +declare i32 @llvm.r600.read.global.size.x(</span><span>) nounwind readnone</span><br><span>
>> +declare i32 @llvm.r600.read.global.size.y(</span><span>) nounwind readnone</span><br><span>
>> +declare i32 @llvm.r600.read.global.size.z(</span><span>) nounwind readnone</span><br><span>
>> +</span><br><span>
>> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:64:64-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64"</span><br><span>
>> +</span><br><span>
>> +define i64 @get_global_size(i32 %dim) nounwind readnone alwaysinline {</span><br><span>
>> +  switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]</span><br><span>
>> +x_dim:</span><br><span>
>> +  %x = call i32 @llvm.r600.read.global.size.x(</span><span>)</span><br><span>
>> +  %x.ext = zext i32 %x to i64</span><br><span>
>> +  ret i64 %x.ext</span><br><span>
>> +y_dim:</span><br><span>
>> +  %y = call i32 @llvm.r600.read.global.size.y(</span><span>)</span><br><span>
>> +  %y.ext = zext i32 %y to i64</span><br><span>
>> +  ret i64 %y.ext</span><br><span>
>> +z_dim:</span><br><span>
>> +  %z = call i32 @llvm.r600.read.global.size.z(</span><span>)</span><br><span>
>> +  %z.ext = zext i32 %z to i64</span><br><span>
>> +  ret i64 %z.ext</span><br><span>
>> +default:</span><br><span>
>> +  ret i64 1</span><br><span>
>> +}</span><br><span>
>> diff --git a/amdgcn/lib/workitem/get_glob</span><span>al_size.ll b/amdgcn/lib/workitem/get_glob</span><span>al_size.ll</span><br><span>
>> index 3d26d2f..3292af5 100644</span><br><span>
>> --- a/amdgcn/lib/workitem/get_glob</span><span>al_size.ll</span><br><span>
>> +++ b/amdgcn/lib/workitem/get_glob</span><span>al_size.ll</span><br><span>
>> @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.global.size.x(</span><span>) nounwind readnone</span><br><span>
>>  declare i32 @llvm.r600.read.global.size.y(</span><span>) nounwind readnone</span><br><span>
>>  declare i32 @llvm.r600.read.global.size.z(</span><span>) nounwind readnone</span><br><span>
>></span><br><span>
>> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:64:64-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64"</span><br><span>
>> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:32:32-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64-A5"</span><br><span>
>></span><br><span>
>>  define i64 @get_global_size(i32 %dim) nounwind readnone alwaysinline {</span><br><span>
>>    switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]</span><br><span>
>> diff --git a/amdgcn/lib/workitem/get_loca</span><span>l_size.40.ll b/amdgcn/lib/workitem/get_loca</span><span>l_size.40.ll</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..36141f9</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/workitem/get_loca</span><span>l_size.40.ll</span><br><span>
>> @@ -0,0 +1,23 @@</span><br><span>
>> +declare i32 @llvm.r600.read.local.size.x() nounwind readnone</span><br><span>
>> +declare i32 @llvm.r600.read.local.size.y() nounwind readnone</span><br><span>
>> +declare i32 @llvm.r600.read.local.size.z() nounwind readnone</span><br><span>
>> +</span><br><span>
>> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:64:64-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64"</span><br><span>
>> +</span><br><span>
>> +define i64 @get_local_size(i32 %dim) nounwind readnone alwaysinline {</span><br><span>
>> +  switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]</span><br><span>
>> +x_dim:</span><br><span>
>> +  %x = call i32 @llvm.r600.read.local.size.x()</span><br><span>
>> +  %x.ext = zext i32 %x to i64</span><br><span>
>> +  ret i64 %x.ext</span><br><span>
>> +y_dim:</span><br><span>
>> +  %y = call i32 @llvm.r600.read.local.size.y()</span><br><span>
>> +  %y.ext = zext i32 %y to i64</span><br><span>
>> +  ret i64 %y.ext</span><br><span>
>> +z_dim:</span><br><span>
>> +  %z = call i32 @llvm.r600.read.local.size.z()</span><br><span>
>> +  %z.ext = zext i32 %z to i64</span><br><span>
>> +  ret i64 %z.ext</span><br><span>
>> +default:</span><br><span>
>> +  ret i64 1</span><br><span>
>> +}</span><br><span>
>> diff --git a/amdgcn/lib/workitem/get_loca</span><span>l_size.ll b/amdgcn/lib/workitem/get_loca</span><span>l_size.ll</span><br><span>
>> index 36141f9..1b5fec1 100644</span><br><span>
>> --- a/amdgcn/lib/workitem/get_loca</span><span>l_size.ll</span><br><span>
>> +++ b/amdgcn/lib/workitem/get_loca</span><span>l_size.ll</span><br><span>
>> @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.local.size.x() nounwind readnone</span><br><span>
>>  declare i32 @llvm.r600.read.local.size.y() nounwind readnone</span><br><span>
>>  declare i32 @llvm.r600.read.local.size.z() nounwind readnone</span><br><span>
>></span><br><span>
>> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:64:64-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64"</span><br><span>
>> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:32:32-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64-A5"</span><br><span>
>></span><br><span>
>>  define i64 @get_local_size(i32 %dim) nounwind readnone alwaysinline {</span><br><span>
>>    switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]</span><br><span>
>> diff --git a/amdgcn/lib/workitem/get_num_</span><span>groups.40.ll b/amdgcn/lib/workitem/get_num_</span><span>groups.40.ll</span><br><span>
>> new file mode 100644</span><br><span>
>> index 0000000..12ec8ea</span><br><span>
>> --- /dev/null</span><br><span>
>> +++ b/amdgcn/lib/workitem/get_num_</span><span>groups.40.ll</span><br><span>
>> @@ -0,0 +1,23 @@</span><br><span>
>> +declare i32 @llvm.r600.read.ngroups.x() nounwind readnone</span><br><span>
>> +declare i32 @llvm.r600.read.ngroups.y() nounwind readnone</span><br><span>
>> +declare i32 @llvm.r600.read.ngroups.z() nounwind readnone</span><br><span>
>> +</span><br><span>
>> +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:64:64-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64"</span><br><span>
>> +</span><br><span>
>> +define i64 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {</span><br><span>
>> +  switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]</span><br><span>
>> +x_dim:</span><br><span>
>> +  %x = call i32 @llvm.r600.read.ngroups.x()</span><br><span>
>> +  %x.ext = zext i32 %x to i64</span><br><span>
>> +  ret i64 %x.ext</span><br><span>
>> +y_dim:</span><br><span>
>> +  %y = call i32 @llvm.r600.read.ngroups.y()</span><br><span>
>> +  %y.ext = zext i32 %y to i64</span><br><span>
>> +  ret i64 %y.ext</span><br><span>
>> +z_dim:</span><br><span>
>> +  %z = call i32 @llvm.r600.read.ngroups.z()</span><br><span>
>> +  %z.ext = zext i32 %z to i64</span><br><span>
>> +  ret i64 %z.ext</span><br><span>
>> +default:</span><br><span>
>> +  ret i64 1</span><br><span>
>> +}</span><br><span>
>> diff --git a/amdgcn/lib/workitem/get_num_</span><span>groups.ll b/amdgcn/lib/workitem/get_num_</span><span>groups.ll</span><br><span>
>> index 12ec8ea..8fb55ed 100644</span><br><span>
>> --- a/amdgcn/lib/workitem/get_num_</span><span>groups.ll</span><br><span>
>> +++ b/amdgcn/lib/workitem/get_num_</span><span>groups.ll</span><br><span>
>> @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.ngroups.x() nounwind readnone</span><br><span>
>>  declare i32 @llvm.r600.read.ngroups.y() nounwind readnone</span><br><span>
>>  declare i32 @llvm.r600.read.ngroups.z() nounwind readnone</span><br><span>
>></span><br><span>
>> -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:64:64-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64"</span><br><span>
>> +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p</span><span>3:32:32-p4:32:32-p5:32:32-i64:</span><span>64-v16:16-v24:32-v32:32-v48:</span><span>64-v96:128-v192:256-v256:256-</span><span>v512:512-v1024:1024-v2048:2048</span><span>-n32:64-A5"</span><br><span>
>></span><br><span>
>>  define i64 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {</span><br><span>
>>    switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]</span><br><span>
></span><br><span>
> ping</span><br><span>
</span><br><span>
I've at least verified that this series fixes the build for me, haven't had a chance to run any CTS runs against applicable tests yet.</span><div><br></div><div>If I don't get back to you in the next 24 hours you can at least put my acked by on this series.</div><div><br></div><div>--Aaron<br><span>
</span><br><span>
> ______________________________</span><span>_________________</span><br><span>
> Libclc-dev mailing list</span><br><span>
> </span><a href="mailto:Libclc-dev@lists.llvm.org" target="_blank">Libclc-dev@lists.llvm.org</a><br><span>
> </span><a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev</a><br><span>
></span><br><span>
</span></div>