<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/67574>67574</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
AMDGPU instruction selection failure
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
upsj
</td>
</tr>
</table>
<pre>
Compiling the following file
```
#include <hip/hip_runtime.h>
__global__ void block_transpose(const int* __restrict__ blocks,
const int* __restrict__ block_ptrs,
int* __restrict__ out_blocks)
{
const auto block_size = block_ptrs[1] - block_ptrs[0];
const auto block_stride = 1;
const auto rank =
unsigned(threadIdx.x +
blockDim.x * (threadIdx.y + blockDim.y * threadIdx.z)) %
2;
for (int i = 0; i < block_size; ++i) {
auto val = blocks[i * block_stride + rank];
out_blocks[i + rank * block_stride] = val;
}
}
__global__ void block_transpose(const short* __restrict__ blocks,
const int* __restrict__ block_ptrs,
short* __restrict__ out_blocks)
{
const auto block_size = block_ptrs[1] - block_ptrs[0];
const auto block_stride = 1;
const auto rank =
unsigned(threadIdx.x +
blockDim.x * (threadIdx.y + blockDim.y * threadIdx.z)) %
2;
for (int i = 0; i < block_size; ++i) {
auto val = blocks[i * block_stride + rank];
out_blocks[i + rank * block_stride] = val;
}
}
```
using `clang++ -x hip -O3 -c file.cpp` causes a fatal error when lowering to device code:
```
fatal error: error in backend: Cannot select: 0xc0314f0: i1 = mul # D:1 0xba424c0, 0xc0315d0
0xba424c0: i1 = truncate 0xc031a30
0xc031a30: i32,ch = load<(dereferenceable invariant load (s32) from %ir.13, addrspace 4)> 0xca6fa58, 0xba41ea0, undef:i64
0xba41ea0: i64 = add 0xba41d50, Constant:i64<4>
0xba41d50: i64 = AssertAlign<4> 0xba426f0
0xba426f0: i64,ch = CopyFromReg 0xca6fa58, Register:i64 %13
0xc031640: i64 = Register %13
0xc031020: i64 = Constant<4>
0xc030f40: i64 = undef
0xc0315d0: i1 = truncate # D:1 0xc031b10
0xc031b10: i32 = add # D:1 0xc031720, 0xba42680
0xc031720: i32 = MUL_U24 # D:1 0xba425a0, 0xc031db0
0xba425a0: i32 = AssertZext # D:1 0xba42530, ValueType:ch:i10
0xba42530: i32,ch = CopyFromReg # D:1 0xca6fa58, Register:i32 %11
0xc031b80: i32 = Register %11
0xc031db0: i32 = srl 0xc031a30, Constant:i32<16>
0xc031a30: i32,ch = load<(dereferenceable invariant load (s32) from %ir.13, addrspace 4)> 0xca6fa58, 0xba41ea0, undef:i64
0xba41ea0: i64 = add 0xba41d50, Constant:i64<4>
0xba41d50: i64 = AssertAlign<4> 0xba426f0
0xba426f0: i64,ch = CopyFromReg 0xca6fa58, Register:i64 %13
0xc031640: i64 = Register %13
0xc031020: i64 = Constant<4>
0xc030f40: i64 = undef
0xc031aa0: i32 = Constant<16>
0xba42680: i32 = AssertZext # D:1 0xba42610, ValueType:ch:i10
0xba42610: i32,ch = CopyFromReg # D:1 0xca6fa58, Register:i32 %10
0xc0313a0: i32 = Register %10
In function: _Z15block_transposePKiS0_Pi
clang-16: error: clang frontend command failed with exit code 70 (use -v to see invocation)
clang version 16.0.0 (https://github.com/llvm/llvm-project dee4bc4a4ecc56623d511ea571355d1e1ad02159)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /home/tribizel/llvm-project/build/bin
clang-16: note: diagnostic msg: Error generating preprocessed source(s).
```
When compiling only the `short` overload, the IR is a bit more manageable
```
fatal error: error in backend: Cannot select: 0x9cd80f0: i1 = mul # D:1 0x9cd8320, 0x9cd8550
0x9cd8320: i1 = truncate 0xa608960
0xa608960: i16,ch = CopyFromReg 0xacb1ac8, Register:i16 %2
0xa608810: i16 = Register %2
0x9cd8550: i1 = truncate # D:1 0xa608c00
0xa608c00: i32 = AssertZext # D:1 0x9cd88d0, ValueType:ch:i27
0x9cd88d0: i32,ch = CopyFromReg # D:1 0xacb1ac8, Register:i32 %3
0x9cd8470: i32 = Register %3
In function: _Z15block_transposePKsPKiPs
```
I bisected this down to dee4bc4a4ecc56623d511ea571355d1e1ad02159 (https://reviews.llvm.org/D134596)
cc @changpeng
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzsWFGP4jgS_jXmxQI5dhLggQcaltNob3WtuZ07aV-QYxfgHWNHtkPT8-tPdqCTAH3Te5qT9mFbLSD2V5_L5foqlXDv1d4ALFDxhIr1iDfhYN2iqf3vo8rK18XKHmulldnjcAC8s1rbl3i1UxoQWSOyRCW5_LeXlCkjdCMBI7Y6qBrRzUHVW9eYoI4wOSD20wXZ-9xu99pWXG-3-GSVxJW24us2OG58bT0gOhPW-ICVCYgu8XbrwAenRNhuW6xHdNVS4e_8fYdoWwf3cbJHNLYJ26tP88smp08dX-sAb4K9rOjVtxisdd-B4ilDxRqPh2MEFWvEnvqRe8wYnJItZ_aGv0E6br5GxHCjjUkJIRGdhYMDLj_J8-SMEX16JyBpwbU6JtASD-xeo12HeE2IbvpbjA-dY0SLITkd-LyzLtIqE7BKWyKIPaWfq14A41j0kj6pxDm9cTjt-cR1F-gYUJVcGgaNPqXY9EN9JekdbWv7dAnjDUk8urjOieuOA03X12xY_-8K8Afr_lwaeOzRXyr4SwUfVMHwBtL4eH9BJRGam327GTw-44Oq8fgfDI9FuvlMRF2jkmDBGw8ec7zjgWsMzlmHXw5gsLYv4NKdy2IJJyUACysBseXDdXv2iC0vRMrgiouvYGQcW3FjbMAeNIgQB8hZEJblOxIvVJZ2e2w0RpThNWLLDJNzxXOaC4Lo6gIvJLnGs5vt7INrjOABLmjOSBf9bijiGUV0JQ7JSlsuEVshOpPgYAcOjABeacDKnLhT3ISEiTnko-Ec75w9xqxTbpKx6B6X0vmaC8B5zEn2U1yPlztezFrvK55nwNNWGiNhh9hSlfnFvW46-lbmyS0u5WVCFsluFaXHTbiYslX-1g9c06vD94iW3oMLS6325mJ0CV65I7ea7CZagi5KK1u_bpw9fob9cG-fYa98ANe6FaOSsXupt-Ev84FnV9OBUYskdIDs9n6364QnuyFzG-M-YUydB5nST7cIq7K7nIlDbc68ncut1ZSSt2Om5Yzc-ZcQPZJfvvx9-4Xmd9le8F62y4o8ON6E6VG1x_sbnMM9G0ts_-K6gV9f66hfcYjnlL1z8MUDefQPfrDvxykQvaJFlr2XAtVs4P0gBbLBicXt95De6Z6Ib_TAKGKrrOxS488sd3wT9h-h_B-k__9zFfijteCB2ccLwwfLww0_H6qrx99Pr36YZh-VY5l9UI4d_MeIkQxEwfj7CrwgPxm8a4wIypoI3f6WFTdN9fPP6p9k-6xafOo3xjFAy64NSINROyaAkVjY45EbiXdcaZD4RYUDhrMKqbPAUxL11njA41PsOjwkPVrBkxPXPrjlPIHzyhqclRMySYaHEGof2xO6QXSzV-HQVBNhj4hutD5dv8a1s7-DCFgC5JXIeQ5CFGVJmSyyDHgxzVhRyAwyLgnNivnbur9yt4fUt5xn5bbMx435auyLGWtlmvN4b5oLLvWm-Ggl6IiurVfna0x94FqDXKsUnvhwb4-A6CY4ValvoG-8RHRTNUrL-K3MfaCNDTGFsFR8b6wPSuCj38eRn1IDtgcDjofYxtUOamcFeA8Se9s4EZ-K4uPF5GFD9-_YBIq39xfW6Nf0EgOVpH1kKQm2J3CpltJVmvv0GavYTFYq4KN1gI_c8H2qrD-waZwLOSP_tWmMCHa9H8eLoug1jdfZh00jL8lsXg4agOtQwpfvFUIuqoyLO_llZRQVHdaMyDjLrox3EqRDX4viu21LJBTkzus49P26FNeYyXfqEp0OXb-CP1qV3glLW5XYbc2L7Pn03drEPl6a_PPP6tk_fsfWkuBKeRABJA4H5bG0L6Z91vlYXbgvOQ5OCl78JAp4Yt0e0c06Y3kxL7tn-FbBAqOciAM3-xrMfiQXTM7ZnI9gkZXzYsZYRqejw4LPZlW123GYVTnQnFbTAsRsxwSDilSsGKkFJZSROZ1mZV7Q6aQSlM8Iz2g5F-VO5igncORKv_k0Ut43sCinxTQfaV6B9unlJaUGXnCaRJSiYj1yi1SIqmbvUU608qHb2SiooGGx_GX9t-cvWBkfXJOO46LT-CvW-MbBqHF68cdKM6Kb5IhHdJMc_U8AAAD__3VT_MY">