<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/102062>102062</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[Aarch64] Missed optimization: substraction with borrow
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
mratsim
</td>
</tr>
</table>
<pre>
https://alive2.llvm.org/ce/z/UVMJQX
The following supposedly ensure proper generation of substraction with borrow on x86, the codegen for aarch64 is unfortunately not up to par.
https://github.com/llvm/llvm-project/blob/1b9d7dd9eb31974077b64d66404193bd7c4ad65e/llvm/test/CodeGen/X86/subcarry.ll#L586-L652
```llvm
; target triple = "x86_64-unknown-linux-gnu"
target triple = "aarch64-unknown-linux-gnu"
define i256 @sub256(i256 %a, i256 %b) nounwind {
; CHECK-LABEL: sub256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; CHECK-NEXT: movq %rcx, 16(%rdi)
; CHECK-NEXT: movq %rdx, 8(%rdi)
; CHECK-NEXT: movq %rsi, (%rdi)
; CHECK-NEXT: movq %r8, 24(%rdi)
; CHECK-NEXT: retq
entry:
%0 = sub i256 %a, %b
ret i256 %0
}
%uint128 = type { i64, i64 }
%uint256 = type { %uint128, %uint128 }
; The 256-bit subtraction implementation using two inlined usubo procedures for U128 type { i64, i64 }.
; This is similar to how LLVM legalize types in CodeGen.
define void @sub_U256_without_i128_or_recursive(%uint256* sret(%uint256) %0, %uint256* %1, %uint256* %2) nounwind {
; CHECK-LABEL: sub_U256_without_i128_or_recursive:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: movq (%rsi), %rcx
; CHECK-NEXT: movq 8(%rsi), %rdi
; CHECK-NEXT: movq 16(%rsi), %r8
; CHECK-NEXT: movq 24(%rsi), %rsi
; CHECK-NEXT: xorl %r9d, %r9d
; CHECK-NEXT: subq 16(%rdx), %r8
; CHECK-NEXT: setb %r9b
; CHECK-NEXT: subq 24(%rdx), %rsi
; CHECK-NEXT: subq (%rdx), %rcx
; CHECK-NEXT: sbbq 8(%rdx), %rdi
; CHECK-NEXT: sbbq $0, %r8
; CHECK-NEXT: sbbq %r9, %rsi
; CHECK-NEXT: movq %rcx, (%rax)
; CHECK-NEXT: movq %rdi, 8(%rax)
; CHECK-NEXT: movq %r8, 16(%rax)
; CHECK-NEXT: movq %rsi, 24(%rax)
; CHECK-NEXT: retq
%4 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 0, i32 0
%5 = load i64, i64* %4, align 8
%6 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 0, i32 1
%7 = load i64, i64* %6, align 8
%8 = getelementptr inbounds %uint256, %uint256* %2, i64 0, i32 0, i32 0
%9 = load i64, i64* %8, align 8
%10 = getelementptr inbounds %uint256, %uint256* %2, i64 0, i32 0, i32 1
%11 = load i64, i64* %10, align 8
%12 = sub i64 %5, %9
%13 = icmp ult i64 %5, %9
%14 = sub i64 %7, %11
%15 = icmp ult i64 %7, %11
%16 = zext i1 %13 to i64
%17 = sub i64 %14, %16
%18 = icmp ult i64 %14, %16
%19 = or i1 %15, %18
%20 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 1, i32 0
%21 = load i64, i64* %20, align 8
%22 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 1, i32 1
%23 = load i64, i64* %22, align 8
%24 = getelementptr inbounds %uint256, %uint256* %2, i64 0, i32 1, i32 0
%25 = load i64, i64* %24, align 8
%26 = getelementptr inbounds %uint256, %uint256* %2, i64 0, i32 1, i32 1
%27 = load i64, i64* %26, align 8
%28 = sub i64 %21, %25
%29 = icmp ult i64 %21, %25
%30 = sub i64 %23, %27
%31 = zext i1 %29 to i64
%32 = sub i64 %30, %31
%33 = zext i1 %19 to i64
%34 = sub i64 %28, %33
%35 = icmp ult i64 %28, %33
%36 = zext i1 %35 to i64
%37 = sub i64 %32, %36
%38 = getelementptr inbounds %uint256, %uint256* %0, i64 0, i32 0, i32 0
store i64 %12, i64* %38, align 8
%39 = getelementptr inbounds %uint256, %uint256* %0, i64 0, i32 0, i32 1
store i64 %17, i64* %39, align 8
%40 = getelementptr inbounds %uint256, %uint256* %0, i64 0, i32 1, i32 0
store i64 %34, i64* %40, align 8
%41 = getelementptr inbounds %uint256, %uint256* %0, i64 0, i32 1, i32 1
store i64 %37, i64* %41, align 8
ret void
}
```
```arm64
sub256: // @sub256
subs x0, x0, x4
sbcs x1, x1, x5
sbcs x2, x2, x6
sbc x3, x3, x7
ret
sub_U256_without_i128_or_recursive: // @sub_U256_without_i128_or_recursive
ldp x9, x12, [x1]
ldp x10, x11, [x0]
ldp x13, x15, [x1, #16]
subs x9, x10, x9
cset w10, lo
subs x11, x11, x12
ldp x12, x14, [x0, #16]
ccmp x11, x10, #0, hs
cset w16, lo
sub x10, x11, x10
stp x9, x10, [x8]
subs x12, x12, x13
sub x13, x14, x15
cset w11, lo
subs x12, x12, x16
sub x9, x13, x11
cset w11, lo
sub x9, x9, x11
stp x12, x9, [x8, #16]
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy0WV2PozoS_TXOi9URLoOBhzx0pie72u1ZaaU7V_PW4sOdeJdgxjad9Pz6KxsIkBCSVt9ptSBA1alTVfYxH4nWYltyvkLBGgVPi6Q2O6lWe5UYLfaLVObvq50xlUb0EcEGwSYpxBuHZVG87ZdSbRFsMo5g8wvB5vuf3_713x_Ie0LeY7P9Y8fxqywKeRDlFuu6qqTmefGOealrxXGlZMUV3vKSq8QIWWL5inWdaqOSzB0fhNnhVColD1iW-BgxBF-w2XGcyZxveYlfpcJJorId87HQuC5fpTJ1mRhevONSGlxX2EhcJWo5pDbOaivMrk6Xmdwj2Njc2t1DpeT_eGYQbNJCpgg2JI3zMM9jnlISh74Xhinzc8Z8zycxTfMw85OcBbzHMVxb_y8y5__gJYLND5vERtdplij1viwKBPQ5iNjDMwtgyBExr_l3SM0pusYmUVtusFGiKjhG9AkjgGPEXpj_UJf_L-WhfChEWR8ftmWNoIWc9GoLN-PWbHP-KkqOBQQMI9_TdQoBQxA1JyBIbFe6gxRBjEtZlwdR5hiF6576l39-_fLvh-fH9ddnRB9xi0MfzyzsteYPAXWQ6dKzJ9tDXhr1fo76n68__mgd9_Ltp7VTubDE7K_kOGOv6_SnIxquUbD2HmIUPCFYo_DJ_kNkAXSFIO7QtJhDSz-Ils9y-yha9reiRfeUOTtac8JaAFv1-K72OL_oo2667erHvCLrBP6dboqbn83lZrB1Y9QOycBz80fXKR5NADf2GyvFzema14YJn0aTG4JalIZA5MDMe8VtY7BgvptMzMe9R2Pr8Ia2PUYb_oR4FouusdViCNhDKoxlflJYsa8KvuelaQS41laqzUFiURai5DmudZ1KK9UZz2vFtVPc7zbKFc7LYVChrSprsRdFoqwQ7-QBPz__-Q0XfJsU4hd3MBqLErcSuRxpzpsUeas5L98hYC92SZC1eREEohepXhTPaqXFG2862xYKwSPWipuzk3HTkL5arSmCgEyehY-I2S2Cdwvd36htrX3UTZz7dMJ5RVNu-Zz0ObeTDoz8bgrJaWqO3GaV9ihV4azivLOP81s638vU8U56mpu0AU9vgff6crx7uXCLz4TXbSGPptxmG9ToP_jeXXk3xoG6L5Gz5aDllhzvXQzEcDG42y0aLT13uzVryKldt_z65cAtAL7T4S03vNHOyigsylTWZa5HGnJNZ6xUuh4ICsMffYjAhShkkg_ktYVwh0khtiWOBi7s97AigxDhDCt2yQpBEH2CFNwuFYIgnuEUTVeKeL-H1bBUhMzwIt5ksQj0dxZ2MYUgaCnEQ2TqrES2r3BdmFlT_xwwbK3IiGswiTht2wyzX_xosCAtHyNdcgOr8Dww8Ts0NrSLJiNfMW5aLVUXuEuZDJsLn2nu5TwgU7MT5poLE8115-H3EBt2B-gcMbhC7DN6djkdpis2J2hwRdHgM5I2Q2xUsTlNgwlRc-ej8-EN3f0jBEO7eHJ4TxtT7wKUdnbh0I6cz0CIxzPQGl1ICe0WfTpMn9KL6RxfTmd6oSOnZw5Kh3bTQnLF-EJJaDAR-kJJKHRoQ3Ggn1lpvDsWZW2k4ieBgvFAoVdWGhr_HlZkmlV4xiqeZuV_RiIvWV1O-BEren77Mr34-eT3kOpKNeZ0VimfTFTKPsXb58_zB_juvdzky7pE7bvxe3q_hZvXjIN3Z22E5k_XqXbPM458u_XPbNKssXFE222nHqeLblS22_Mgaeb2R6cp7Tbs31ecKN9-im2fWAcp3fIZMSnyqmESN6k00zlYHwkK2hL3Ns2d0pGQzsjrjc7xSJNUe1vgAN1YoYT1Tqdit9Eb_HgMmWlu7P7QXC7klYYRMmBnM7lCrOlIe1fjkrgk1oW22jnE7mzdfqevEWVXiE6U0R6NzUw1roflGF1Q69OGQeds2a8EpYO0bVuuUCfzNR4HY30fR6OIdgl-MMgQJJ7EsNUZMon7Cl3t4mlCnXRhka9oHtM4WfAVCQGiGCLiL3arOCQs8kkY-xnJsldgPPQYp7lHeUjSmC3ECjzwvcgLgIBH_GWaMuIlnDHwwteYZcj3-D4RxemzzEJoXfMV8cBjsCiSlBfafeMBKPkBu6sIAAVPC7VyHzrSequR7xVCG93DGGEK93HosflQgIIn_E1ozXMsKyP24pd7a9i-_pr8bLOoVbH68NcWx1Aj2LQpvK3grwAAAP__oq_fCg">