<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/60603>60603</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[RISCV] Correctness issue with +zve32x and float vector instructions
</td>
</tr>
<tr>
<th>Labels</th>
<td>
bug,
backend:RISC-V
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
dcaballe
</td>
</tr>
</table>
<pre>
The following function produces the wrong output when compiled with `+zve32x`. It works with `+v` or `+zve32f`.
```
target datalayout = "e-m:e-p:32:32-i64:64-n32-S128"
target triple = "riscv32-unknown-unknown-eabi-elf"
%iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr }
%iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, ptr, i32 }
define i32 @_iota_dim0_dispatch_0_generic_2x3(ptr noalias nocapture nonnull readnone align 16 %0, ptr noalias nocapture nonnull readonly align 16 %1, ptr noalias nocapture nonnull readonly align 16 %2) local_unnamed_addr #0 {
%.elt19 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t, ptr %1, i32 0, i32 10
%.unpack20 = load ptr, ptr %.elt19, align 4
%4 = load ptr, ptr %.unpack20, align 8
%splitgep = getelementptr i8, ptr %4, i32 64
%.elt23 = getelementptr inbounds %iree_hal_executable_workgroup_state_v0_t, ptr %2, i32 0, i32 1
%.unpack24 = load i32, ptr %.elt23, align 4
%5 = shl i32 %.unpack24, 1
%6 = insertelement <3 x i32> undef, i32 %5, i64 0
%7 = sitofp <3 x i32> %6 to <3 x float>
%8 = shufflevector <3 x float> %7, <3 x float> poison, <3 x i32> zeroinitializer
store <3 x float> %8, ptr %splitgep, align 64
%9 = add <3 x i32> %6, <i32 1, i32 undef, i32 undef>
%10 = sitofp <3 x i32> %9 to <3 x float>
%11 = shufflevector <3 x float> %10, <3 x float> poison, <3 x i32> zeroinitializer
%12 = getelementptr i8, ptr %4, i32 76
store <3 x float> %11, ptr %12, align 4
ret i32 0
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) "frame-pointer"="all" "hot" "no-builtins" "nonlazybind" }
```
llc repro.ll -O3 -mattr=+m,+a,+f,+zvl512b,+zve32x -mtriple=riscv32 -target-abi=ilp32:
```
_iota_dim0_dispatch_0_generic_2x3: # @_iota_dim0_dispatch_0_generic_2x3
# %bb.0:
addi sp, sp, -16
sw ra, 12(sp) # 4-byte Folded Spill
sw s0, 8(sp) # 4-byte Folded Spill
addi s0, sp, 16
lw a0, 28(a1)
lw a1, 4(a2)
lw a0, 0(a0)
slli a1, a1, 1
fcvt.s.w ft0, a1
fsw ft0, 72(a0)
fsw ft0, 68(a0)
fsw ft0, 64(a0)
addi a1, a1, 1
fcvt.s.w ft0, a1
fsw ft0, 84(a0)
fsw ft0, 80(a0)
fsw ft0, 76(a0)
li a0, 0
addi sp, s0, -16
lw ra, 12(sp) # 4-byte Folded Reload
lw s0, 8(sp) # 4-byte Folded Reload
addi sp, sp, 16
ret
```
llc repro.ll -O3 -mattr=+m,+a,+f,+zvl512b,+v -mtriple=riscv64 -target-abi=lp64d:
```
_iota_dim0_dispatch_0_generic_2x3: # @_iota_dim0_dispatch_0_generic_2x3
# %bb.0:
addi sp, sp, -16
sd ra, 8(sp) # 8-byte Folded Spill
sd s0, 0(sp) # 8-byte Folded Spill
addi s0, sp, 16
lwu a0, 36(a1)
lwu a1, 32(a1)
slli a0, a0, 32
or a0, a0, a1
ld a0, 0(a0)
lw a1, 4(a2)
addi a2, a0, 64
slli a1, a1, 1
vsetivli zero, 4, e32, mf2, ta, ma
vmv.v.x v8, a1
vfcvt.f.x.v v9, v8
vrgather.vi v10, v9, 0
addi a1, a0, 72
vsetivli zero, 1, e32, mf2, ta, ma
vse32.v v9, (a1)
vse64.v v10, (a2)
vsetivli zero, 4, e32, mf2, ta, ma
vadd.vi v8, v8, 1
vfcvt.f.x.v v8, v8
vrgather.vi v9, v8, 0
addi a1, a0, 84
vsetivli zero, 1, e64, m1, ta, ma
vse32.v v8, (a1)
vmv.x.s a1, v9
sw a1, 76(a0)
srli a1, a1, 32
sw a1, 80(a0)
li a0, 0
addi sp, s0, -16
ld ra, 8(sp) # 8-byte Folded Reload
ld s0, 0(sp) # 8-byte Folded Reload
addi sp, sp, 16
ret
```
The `+zve32x` version produces the output `[0, 0, 1]`, which is not correct, and the `+v` (or +zve32f`) produces `[0, 1, 2]`, which is correct. I think this is another problem related to dealing with float vectors in zve32x mode.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzUWM1v66oS_2vIBjnC2HGcRRanza10Vk-65-puI2zGCe9gsAA7bf_6J_BHnI-XVu19i1dV2PEwXz-GmQFmrTgogC1aPaHVbsFad9Rmy0tWMClhUWj-tv3rCLjSUuqTUAdctap0QivcGM3bEix2R8Ano9UB69Y1rcOnIyhc6roREjg-CXfEKCOIPr13kNBXlJEl_unwSZvfdk7uUEawNvPJlZ-MyA6RH8OYkeE__HTMHMBhzhyT7E23DqNkhxGlENUo-QFRg5IfCQ1DJLIUJT-yNFIJjX7FNEeUXohxRjQSRhFG2LJLaNSq30qf1PQEVogIZDVxDyNdCQOwPzK5h1coW8cKCXsubMNcedxbxxzsO7LvbXRvDWC0fsIioYg-T484mz_u0vLz2Dhz-cBovXtoj0f9YHTbfMegQZ9I6ExfGDlUQkFPScleaMf2XNTkjAPZH0CBEeWeviaI5t5opZkUzGKlS9a41gBWWqlWSmyAcaUVYCbFQeE4w4iuyOjsYz6t5NsFX_xFPoroBktdMrlvlWI18D3j3GBEE-Ih613HfuoSpIs3AdADOJBQg3JepVCFbhW3-PNxMi3pYLkHlYwv8bADgtJWNaz8TUnQKzXjFxExWuU_9G6lM4vT_8o0Sj3z5TM-20jhDtDc8zWfiUlHi7N0ZjFIR5MvwHQvfGfK6C1O88UZPJq5fA7os1n3cVoFJnuUfXDPpPn5czVZmCmUBTO6hlHynODXoC75A7eKQzXtILpahfcsxWQmZt0rFE5XzRV_UOL0-LWSmjmU_DFjzgdr26qS0EHpfF69nB10eMXX3xstrFYzyqD1HYwWSjjBpHgHM2qzThu4J3weBWO0nLHN5uD2O4ZxfsfRwZB-MQfMLgDsf1y4H5OH4G0egxfHn0MvJv8AfF4O_fQmWmcfwh7H88RB70SzATdskj5zX6Zw5pwRRevADgnOl8P1E65b6xqjDwasz5yVAZ82DZStsf7NvqkSK92qk1C-7EtpwLVG4Rpqbd4QzX1yPRnhIHijWFmCtaLwPvtq7ZMw-FSLKK0MqyFqtFAOjK-0yQ5RyqRE1O8YetRueFM6KlohnVB2-qIke38rhOLhy-TeZe_Qj1KW2EBj9FJKHP0rwVHtAQj6nmpEnxF9Yv2j6h_vnVzFtBh_-JYGR3XfO6BkN7QNOOqbiogVAiU7IZvQhty15OMymfzA4Q_5pPmZujq0AImPgaJYkrNqsmGcC0Q2NmzGfozibCLbEyIbw0JWo4jmfsYG3_3zCtKoeHOAX7TkwPGvRkh5KcqGXZI_lvShqNFocjZ6ZrP0ilig-bYuZzGimytq2BapJ9JbYmAlnkjmRCulmHj7MZ6IVdm5pV16_sqRYcZEtLPva3oj-YKe5R_Q0xv6gMd3LctvJV_SbzG59Cy7ofeIDYDejzlyHXPyOzH3J_hSfinrq0F3JevOVplZbcD97zJLd5NUsvQqqcgmS_n_SVLh0wJ_ZlHyR0mFT-tLvinqo6TSTpGcZHeySjttwITekMfU0W8_Mswaqdpc0WYbVPIHGelhLhuTAj3LHdusj5JZZ8GJLkzwjcog_xlD3yDXVXi4sII1O7PV3bJbvvq3_MqNLmShavm67PyvcPzo8jPZHJg7gll2XmfX91L9rJu0MZg85tIHNsefsdlCQmdG3SxdZyFL-wlDi3cN9FfRYpwP_uYDHJeLcAlZ_hiyzVnEQ8Ty9GPEsmB-HX8EWH4fsLpbvi7tpLfbXLYA_dc7tcKam5CcbZIZ75069KU68500dF1nvpOH_sE689cRru_XcAfG3lzSDddzfu7qabTbB8BqF_if8ekoyiMWvr13uNTGQBlO10zxIGF2T4do7g9Fs3s67_6kb6YkrB-9o2RQsMQ_sTsK9duP1hOY0j7MvTR_OMAGJHPA_aGNA5NCHfprw3Dqwf35zGKh8NCN15rDcGu44NuEb5INW8A2ztarPF2lhC6O2zRLcpYnaUYKulqvkopxmsE6Z-mqjDnPFmJLCU0IJTnJkyylS7Le5JuMl9k6peuMlSglUDMhl1J29VKbw0JY28I2IxlJFpIVIG24WKW0aA_-DOPrOi1Y-RuUL9t__vz1HP3tCavdwmy9mKhoDxalRArr7FmwE06GS1rP8jda7fBzj53yZ7GgdrhInU4kfsnm-GChrDNtuLu1i9bI7dG5xvoSTl8QfTkId2yLZalrRF-84uERNUb_O4TBS9BjEX0JHv4nAAD__xwTI5c">