<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/57382>57382</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Slightly suboptimal encoding of sign-extend -> subtract in AArch64 by LLVM
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
dead-claudia
</td>
</tr>
</table>
<pre>
Link: https://godbolt.org/z/qzjreh383 (found this while working on https://github.com/llvm/llvm-project/issues/57381, hence the names)
Bitcode:
```llvm
; returns (1000 - %0) & (signed(%0) >> 31)
define dso_local i32 @sub_imm_mask_2(i16 noundef %0) local_unnamed_addr #0 {
%2 = sext i16 %0 to i32
%3 = sub nsw i32 1000, %2
%4 = icmp slt i16 %0, 0
%5 = select i1 %4, i32 %3, i32 0
ret i32 %5
}
; returns (%0 - %1) & (signed(%1) >> 31)
define dso_local i32 @sub_reg_mask_2(i32 noundef %0, i16 noundef %1) local_unnamed_addr #0 {
%3 = sext i16 %1 to i32
%4 = sub nsw i32 %0, %3
%5 = icmp slt i16 %1, 0
%6 = select i1 %5, i32 %4, i32 0
ret i32 %6
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8a" }
```
This currently compiles to the following:
```s
sub_imm_mask_2:
mov w8, #1000
sxth w9, w0
sub w8, w8, w9
and w0, w8, w9, asr #31
ret
sub_reg_mask_2:
sxth w8, w1
sub w9, w0, w8
and w0, w9, w8, asr #31
ret
```
This could be re-encoded as the following (so the `sub` doesn't need to wait on the `sxth` to start) to get a minor perf boost at zero cost to instruction count:
```s
sub_imm_mask_2:
mov w8, #1000
sxth w9, w0
sub w8, w8, w0, sxth
and w0, w8, w9, asr #31
ret
sub_reg_mask_2:
sxth w8, w1
sub w9, w0, w1, sxth
and w0, w9, w8, asr #31
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzNVsly4zYQ_RrqgqKLi0hJBx6scXJyTknlqgLJJokxCCgAaI399XkAJVmSx84kuYxLJrE89PYa3ax1-1I9CvUU5fdscG5vMYiyX_HrdVtr6e606TF7xf9fr18NDfk6Z1G27vSkWuYGYdlhEJLYQZsnoXqm1a0g4Yapvmv0iImUz6dXvDf6KzUOU2HtRBaDYpWv0yj7wgZSDUE8McVHv7WJkocouZ-fW-Ea3ZJXcbEalcn8C0rmpXzLDLnJKOuNTpMkYTFGRQKJeJd-1YpeUYvBeT3_BT-Wp2e1LXVCEWut3kndcMlEnrFomdip3olx3I3cPu0yiBBpyZQPDXVnNeHEblLek3bH29ZgK09YtNrO0pmHQl7-wCx9c8wL8YeZ017RBSifQVPNlD0EI7xLPmBewgVwGYCiGffMyjeJHplcwIqjUgkeAAonPSa4B3Wn8fkMgnnaLI4hXj1ckXAd8eBGiHj6QcTTfx9xQ_1FxLF-HfEv7IaF9MdZyN-xkL5nYfmOhZPiELPb8N6ykN6yUL5nobhgYfkZC-V3WeDOGVFPjuzRSciHo2ycrMO16w1Ziwh1hnDBtKFmMtaP7ItqQuQOArfbEG-VBgsHIeVMKpsOjte47lGWdQahjPdaKEcGcyjBEwdiSbzD0ING4ac9l_EzvNMmPojWDWd0coQpHTvD93tUkHjkFwBnJjpirOPNk68abpZUT11HJrbilc7w9RHruOnJxc1-Om_1pMiI5hrQEYdbvsAcUVG2VaQVQo7R85oH-Dm4pwpzGes_fA1EAA0pJ18Y6twe9dD6tPH1q9NSaoSz_6ha2Xl-U0pOYHb8G_VzeB_Wc57l4eJfQew3NwTIxkMOt7tI1zcBx-fmGsNBesAkVxg8uQ3XBRf06gCS4s36i2t5a_3ZtFlk-oFpJ8Nn5Z-Ztnkz8J9M-4Q0PcmW1QRsjH6DjtJC2jVroVzNTHquphpP1mqySJGVY4pwBkwfuHC-9Z1w8NcDsYOsNc4XIIyRcYwzXAlt2J5Mx2qtLZYceyWjYQ8mvtooi7RvnIBA2Kjcz5g6gYfg50-bQukPmfjfU2lBVVqWaZGkm9Vy0VZ5u8k3fOGEk1T9LkU_-IIA0_TeiRE9LCRZ-EjqmO-BMfoMwZrYNz_gUAJ9B1Ds_t40Q7lk9Qt7fPzzt8VkZPX_vqqyxVAtEyrXtCrLJXwsi64pMF8WadGmecqLzULymqStomLrSzKht3kRvioWDwtRZQkK9jork3W6yZd3-aauC5zgzaprqOPozjRyIe-8Hf67cWGqYFI99RabUlhn3za5DZ8BFNRBPp_coE3VounEjeRTK_gi6K-C_X8DHdD1kw">