<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/54714>54714</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[Clang] Missed optimization with handrolled branchless assigment
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
AMS21
</td>
</tr>
</table>
<pre>
Given the following source code [godbolt](https://godbolt.org/z/qT6YPreza):
```cpp
char to_lower_1(const char c) { return c + ((c >= 'A' && c <= 'Z') * 32); }
char to_lower_2(const char c) { return c + (((c >= 'A') & (c <= 'Z')) * 32); }
char to_lower_3(const char c) {
if (c >= 'A' && c <= 'Z') {
return c + 32;
}
return c;
}
```
compiling with `-O3` produces the following assembly:
```asm
to_lower_1(char): # @to_lower_1(char)
lea eax, [rdi - 65]
cmp al, 26
setb al
shl al, 5
add al, dil
ret
to_lower_2(char): # @to_lower_2(char)
lea eax, [rdi - 65]
cmp al, 26
setb al
shl al, 5
add al, dil
ret
to_lower_3(char): # @to_lower_3(char)
lea eax, [rdi - 65]
lea ecx, [rdi + 32]
cmp al, 26
movzx eax, cl
cmovae eax, edi
ret
```
and the following IR:
```asm
define dso_local noundef signext i8 @_Z10to_lower_1c(i8 noundef signext %0) local_unnamed_addr #0 !dbg !7 {
call void @llvm.dbg.value(metadata i8 %0, metadata !14, metadata !DIExpression()), !dbg !15
%2 = add i8 %0, -65, !dbg !16
%3 = icmp ult i8 %2, 26, !dbg !16
%4 = select i1 %3, i8 32, i8 0, !dbg !17
%5 = add i8 %4, %0, !dbg !18
ret i8 %5, !dbg !19
}
define dso_local noundef signext i8 @_Z10to_lower_2c(i8 noundef signext %0) local_unnamed_addr #0 !dbg !20 {
call void @llvm.dbg.value(metadata i8 %0, metadata !22, metadata !DIExpression()), !dbg !23
%2 = add i8 %0, -65, !dbg !24
%3 = icmp ult i8 %2, 26, !dbg !24
%4 = select i1 %3, i8 32, i8 0, !dbg !25
%5 = add i8 %4, %0, !dbg !26
ret i8 %5, !dbg !27
}
define dso_local noundef signext i8 @_Z10to_lower_3c(i8 noundef signext %0) local_unnamed_addr #0 !dbg !28 {
call void @llvm.dbg.value(metadata i8 %0, metadata !30, metadata !DIExpression()), !dbg !31
%2 = add i8 %0, -65, !dbg !32
%3 = icmp ult i8 %2, 26, !dbg !32
%4 = add i8 %0, 32, !dbg !32
%5 = select i1 %3, i8 %4, i8 %0, !dbg !32
ret i8 %5, !dbg !34
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
```
So the first and second implementation somehow confuse llvm into generating more code.
I would also note that GCC actually generates one cmov instruction for the last function instead of two like clang does.
gcc assembly:
```asm
to_lower_3(char):
lea edx, [rdi-65]
lea eax, [rdi+32]
cmp dl, 26
cmovnb eax, edi
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzdWE-TmzYU_zT4osGDBRj74MPGm2RyyLTT9NJcdoQksBqBqCRs7376PgnsBWKnsbOnejAI6f1_vyc_OVfsefNR7HmN7I6jQkmpDqIukVGtphxRxTgK0nelYrmSNkgfA7zaWduYIH4I8Ae4-qW50iW8vcD3nz-Xf_2u-QsJ8NqRRY9BdLovo-6iTdPN0B3RyKon0Mv10wLEU1Ubi_w8BQkoyN4hzW2ra0RRgN_Bd-XIUBC_D-JHeM3AlAyeS7gcTbzt57_C14vADyjG3hxgzx6HJo0NwD9vwAUbOl1L1C9NzLjNkviyJR0pgo8o0K1hGLK7z8grMCserJ-NGxKeKV5NP2V05ImqGiEdjg7C7hCshr_FcEeNVqyl3EzQRozhVS6fr6GFmKqbGQMF4tJBDF35BDhGQRJd5BoFQnLin5wcA7x1iNdMoBAtUwf5ESmtGv8k0lFCsEerhtu8Xx3P7-SAKx0vEsYGi0zI77I08R7f5T3-n3gf3-V9_Aben0npkLQvnttCVan9y_FVK5VTZrUn_LzMmbgSlYv1R2o2qbBPf7zW1ndVxXghao6YcbGiRKJatTVMIiPKmh8tEisXyqevi0EpwYa0gvkpaYDTyO01XtBTW9ek4uwJcqxdRiK4LVheukc23I-AWKK9EswpknJfzYFqviey5aCn4pYwYok3xCvYovMciFok05nHT--PjebGCFX7vXrtr-1A_-KMQxCJkdsrHRQHKkKAwJhlOWCJPYtwaW6l7flwn-vrbIlnM1xyCkwLL8mRA3-M-0E04c8G_OnE0qSjTac8qxMPQKUnnXqznu7ld6MB_zIacPR2cMD4Djjg-GY44OQuOIzY7oADTm-Hw-v2cxUOOHszOMS_DofV28Eh_m7mJ-AQL26GA-TrHjiM2JJLmjogXOFIrwPoBIeBqEtCriIiTi4i4oQLKonm_52WYfDHY9cOn-Lc_3BZq0XeWugQO0C4Bhba76o1FnrH0uUMUFVozuGhOW21cSPzXFOPNvixY-AQYbUC0B6ElH3r2h4sySUcZjAuNMAubJSoLYd2ANpeONVgxwAPR1CJOpS8JDLcQ1CVDg-C2d2ZMurJahVaTZoGfl_DigwIrG5Poowl9FsIpttOUt4WBdehES_8TL7qaS3RJbchbdrz0nG1DJfJeL3gBDzi5kwE_Qc9gpAtDIqj0d2oqo7dAHrr8wB3o-MqO8lsaz7SWPKaa0H98int46wszlmpXRnLHDx8zcmVTJgGciVJl4NBWn58jviiujZGaDgGuabGcDgSQW1UjeQVry2xUMFwWK34Th3gtFoXLeDBQRFBdhXy3gARdEAVwMWfZ-ed7E_ooFrJoD0zCgy2HFQRiz5ut4hQ24Jjzyd28Nv54HoyEGsgvdTrLZT29kkC5hVt3c06CnAbqQLZg0JSfANWScAEpriZD_0rKb1w9rl-6hl3vld6UzboTcMfNbHDfhdA8YMWll1sYV086vyWHnXGNjFbx2sys8JKvgHdWxcZUIw-CwgERK2xohIvXWL96XEHidfQyMJirklNd9JtAhA2UToIzFotN5O_JICtzedwCoUXB4b-4erwb6hDeAVlrSuhD2mSLZLZbsOjdZrndL3MckYyvMzpar2mhERRQiPYM2aS5FwaZ7IrfX5AXoSrm_RxJjYYiKIkwtEqiePVPMmTRbGIsyyFco3SDDZIXhEh536bVLqc6Y03KW9L43ZPYax5XfTe1dxHyMknrd0pvXn4_AUvZl7xxhv-L_Sg3IE">