<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/104646>104646</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Identical instructions after non-taken conditional jump and at start of taken jump should be hoisted before conditional jump
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Validark
</td>
</tr>
</table>
<pre>
I have encountered this many times, but here's how I triggered this behavior recently:
```zig
export fn foo(
a: u64,
b: u64,
c: u64,
d: u64,
e: u64,
f: u64,
) @Vector(4, u64) {
var all_ored = a | b | c | d;
var tween_bits = @as(u64, 0);
var start_bits = @as(u64, 0);
var end_bits = @as(u64, 0);
while (all_ored != 0) {
const start_bit = all_ored & (~all_ored +% 1); // the lowest set bit of all bitstrings in vec
start_bits |= start_bit;
const after_start_bit = ~blsmsk(all_ored); // all bits after the start_bit (exclusive)
var target_with_end_bit: u64 = if ((a & start_bit) != 0)
a
else if ((b & start_bit) != 0)
b
else
e;
target_with_end_bit ^= f;
target_with_end_bit &= after_start_bit;
const end_bit = target_with_end_bit & (~target_with_end_bit +% 1);
const after_end_bit = ~blsmsk(target_with_end_bit);
end_bits |= end_bit;
tween_bits |= ~(~end_bit +% (start_bit << 1));
all_ored &= after_end_bit;
}
const tween_bits_vec: @Vector(4, u64) = @splat(tween_bits);
const vec = @as(@Vector(4, u64), .{ a, b, c, d }) & ~tween_bits_vec;
const final_vec = ((((vec << @splat(1)) & tween_bits_vec) +% tween_bits_vec) ^ tween_bits_vec) | vec;
return final_vec;
}
fn blsmsk(x: u64) u64 {
return (x ^ (x - 1));
}
```
```asm
foo:
mov rax, rsi
or rax, rdi
or rax, rdx
or rax, rcx
je .LBB0_1
push r15
push r14
push rbx
xor r10d, r10d
.LBB0_4:
mov r11, rax
neg r11
mov rbx, rax
mov r14, rsi
and rbx, r11
xor r11, rax
test rbx, rsi
cmove r14, r8
test rbx, rdi
cmovne r14, rdi
add rbx, rbx
xor r14, r9
and r14, r11
mov r11, r14
neg r11
mov r15, r14
and r15, r11
xor r11, r14
sub r15, rbx
or r10, r15
and rax, r11
jne .LBB0_4
pop rbx
pop r14
pop r15
jmp .LBB0_2
.LBB0_1:
xor r10d, r10d
.LBB0_2:
vmovq xmm0, rcx
vmovq xmm2, rdx
vmovq xmm3, rdi
vpbroadcastq ymm1, r10
vpunpcklqdq xmm0, xmm2, xmm0
vmovq xmm2, rsi
vpunpcklqdq xmm2, xmm3, xmm2
vinserti128 ymm0, ymm2, xmm0, 1
vpandn ymm2, ymm1, ymm0
vpaddq ymm0, ymm2, ymm2
vpand ymm0, ymm0, ymm1
vpaddq ymm0, ymm0, ymm1
vpternlogq ymm0, ymm2, ymm1, 222
ret
```
```diff
foo:
mov rax, rsi
or rax, rdi
or rax, rdx
or rax, rcx
+ xor r10d, r10d
- je .LBB0_1
+ je .LBB0_2
push r15
push r14
push rbx
- xor r10d, r10d
.LBB0_4:
mov r11, rax
neg r11
mov rbx, rax
mov r14, rsi
and rbx, r11
xor r11, rax
test rbx, rsi
cmove r14, r8
test rbx, rdi
cmovne r14, rdi
add rbx, rbx
xor r14, r9
and r14, r11
mov r11, r14
neg r11
mov r15, r14
and r15, r11
xor r11, r14
sub r15, rbx
or r10, r15
and rax, r11
jne .LBB0_4
pop rbx
pop r14
pop r15
- jmp .LBB0_2
-.LBB0_1:
- xor r10d, r10d
.LBB0_2:
vmovq xmm0, rcx
vmovq xmm2, rdx
vmovq xmm3, rdi
vpbroadcastq ymm1, r10
vpunpcklqdq xmm0, xmm2, xmm0
vmovq xmm2, rsi
vpunpcklqdq xmm2, xmm3, xmm2
vinserti128 ymm0, ymm2, xmm0, 1
vpandn ymm2, ymm1, ymm0
vpaddq ymm0, ymm2, ymm2
vpand ymm0, ymm0, ymm1
vpaddq ymm0, ymm0, ymm1
vpternlogq ymm0, ymm2, ymm1, 222
ret
```
[Zig godbolt link](https://zig.godbolt.org/z/f86jKsh7o)
```llvm
; ModuleID = 'BitcodeBuffer'
source_filename = "llvm_code"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-musl"
; Function Attrs: nofree norecurse nosync nounwind memory(none) uwtable
define dso_local <4 x i64> @foo(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) local_unnamed_addr #0 {
%7 = or i64 %1, %0
%8 = or i64 %7, %2
%9 = or i64 %8, %3
%.not16 = icmp eq i64 %9, 0
br i1 %.not16, label %._crit_edge, label %.lr.ph.preheader
.lr.ph.preheader: ; preds = %6
br label %.lr.ph
.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph
%.sroa.0.018 = phi i64 [ %24, %.lr.ph ], [ %9, %.lr.ph.preheader ]
%.sroa.05.017 = phi i64 [ %23, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%10 = sub i64 0, %.sroa.0.018
%11 = and i64 %.sroa.0.018, %10
%12 = xor i64 %.sroa.0.018, %10
%13 = and i64 %11, %0
%.not14 = icmp eq i64 %13, 0
%14 = and i64 %11, %1
%.not15 = icmp eq i64 %14, 0
%. = select i1 %.not15, i64 %4, i64 %1
%15 = select i1 %.not14, i64 %., i64 %0
%16 = xor i64 %15, %5
%17 = and i64 %16, %12
%18 = sub i64 0, %17
%19 = and i64 %17, %18
%20 = xor i64 %17, %18
%21 = shl i64 %11, 1
%22 = sub i64 %19, %21
%23 = or i64 %22, %.sroa.05.017
%24 = and i64 %20, %.sroa.0.018
%.not = icmp eq i64 %24, 0
br i1 %.not, label %._crit_edge, label %.lr.ph
._crit_edge: ; preds = %.lr.ph, %6
%.sroa.05.0.lcssa = phi i64 [ 0, %6 ], [ %23, %.lr.ph ]
%25 = insertelement <1 x i64> poison, i64 %.sroa.05.0.lcssa, i64 0
%26 = shufflevector <1 x i64> %25, <1 x i64> poison, <4 x i32> zeroinitializer
%27 = insertelement <4 x i64> poison, i64 %0, i64 0
%28 = insertelement <4 x i64> %27, i64 %1, i64 1
%29 = insertelement <4 x i64> %28, i64 %2, i64 2
%30 = insertelement <4 x i64> %29, i64 %3, i64 3
%31 = xor <4 x i64> %26, <i64 -1, i64 -1, i64 -1, i64 -1>
%32 = and <4 x i64> %30, %31
%33 = shl <4 x i64> %32, <i64 1, i64 1, i64 1, i64 1>
%34 = and <4 x i64> %33, %26
%35 = add <4 x i64> %34, %26
%36 = xor <4 x i64> %35, %26
%37 = or <4 x i64> %36, %32
ret <4 x i64> %37
}
attributes #0 = { nofree norecurse nosync nounwind memory(none) uwtable "frame-pointer"="none" "target-cpu"="znver4" "target-features"="-16bit-mode,-32bit-mode,-3dnow,-3dnowa,+64bit,+adx,+aes,+allow-light-256-bit,-amx-bf16,-amx-complex,-amx-fp16,-amx-int8,-amx-tile,+avx,-avx10.1-256,-avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,-avx512er,+avx512f,-avx512fp16,+avx512ifma,-avx512pf,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,-avx512vp2intersect,+avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,+bmi,+bmi2,+branchfusion,-ccmp,-cf,-cldemote,+clflushopt,+clwb,+clzero,+cmov,-cmpccxadd,+crc32,+cx16,+cx8,-egpr,-enqcmd,-ermsb,+evex512,+f16c,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,-false-deps-popcnt,-false-deps-range,-fast-11bytenop,+fast-15bytenop,-fast-7bytenop,+fast-bextr,-fast-gather,-fast-hops,+fast-lzcnt,+fast-movbe,+fast-scalar-fsqrt,+fast-scalar-shift-masks,-fast-shld-rotate,-fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,-fast-vector-shift-masks,-faster-shift-than-shuffle,+fma,-fma4,+fsgsbase,+fsrm,+fxsr,+gfni,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,-idivq-to-divl,+invpcid,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,+lzcnt,+macrofusion,+mmx,+movbe,-movdir64b,-movdiri,+mwaitx,-ndd,-no-bypass-delay,-no-bypass-delay-blend,-no-bypass-delay-mov,-no-bypass-delay-shuffle,+nopl,-pad-short-functions,+pclmul,-pconfig,+pku,+popcnt,-ppx,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefer-no-gather,-prefer-no-scatter,-prefetchi,-prefetchwt1,+prfchw,-ptwrite,-push2pop2,-raoint,+rdpid,+rdpru,+rdrnd,+rdseed,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,+sahf,+sbb-dep-breaking,-serialize,-seses,-sgx,+sha,-sha512,+shstk,-slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,+slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,-sse-unaligned-mem,+ssse3,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-usermsr,+vaes,+vpclmulqdq,+vzeroupper,-waitpkg,+wbnoinvd,-widekl,+x87,-xop,+xsave,+xsavec,+xsaveopt,+xsaves" }
```
[LLVM godbolt link](https://godbolt.org/z/z4bq3f91f)
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzsOktvpDqXv4ZskEtgqEcWWXTS3dLV3G810l3MJjJwKNwxNrENVenF_e0jPwBDUd35Rt9qdKOo6vi8fXweLqqIUvTMAZ6i_XO0__pAet0I-fQXYbQi8u2hENXH0x9xQwaIgZei5xokVLFuqIpbwj9iTVtQEX6Ji17HDUiI8FHFjbjEf8Ra0vN55i-gIQMVMpZQAtfsI8q-RMnXKBlfD4n7_0nPDgPXTkgd1zyuhYjwyWFjEmVf4v6QR_jFY-I4LraQ5Ray2kLCDbJeYyL8GEd58heUWsgInwzFMTzG0fF51jUQGRPGXoXZepR9jUkcHV_iwr6W9rWKsudw71ZGXwD4a0G1slJRnhAV4ZPzIU4i_DhJjWaUJlJ_UsTwA68-xz2buTSUQRzh07wlnBrxZLnxUnClZ4fcxmeRg1Hxd4B4jvA-Tp3NOMLfI_w91g3ETFzAKAIdGzWiNloMqLSk_KxiyuMBytlB8xfG4fhiTE-YRcxsUlhHSa1Bvi7d_btgqlVvwV6X7o2OOGHrbaAAn-Basl7RAYzYKozjiWkiz6BfL1Q3r_40fKJZF2htFBkPbMzmXZhYz3Ff6jV_ZIkCpmBWVnxGWXGr4dYMbGSI-dvYVhztvxkT9c0JbDLjg82Y5bHcObxJKPt6T5lNt21amHn3UyO0MSfG1vFtKJrrzGXjdNLrSAQl7zj_Nn6vfI3wKUzUlyh7cf6HlsNamyO5ZTg6fl0fodv47M2rqbDsy_1-59qH6hjRJiqT4DoYTvEA5aLj3FFroF10fI6JnSjmpTQvlXXZJu0h_nvt5Y21mnLCXiebtgLcv8PZ-AXe-1Ba7SvlFmvPYIOw_7aFPb7EC7ck6F7y2am5gJbHUPN4yrLrNHseXWcIx4tXaNisDxZANwkxqx_n6uawJar19oWYBvKYnq0Y7LskV3MOUlFPFzJeECq6FLyhX39NL1f0H-Ded38-Pyev6ZLY9aqxwul-gyDT_A57sTJy9V7INKmsF-bdcjiz-RyPKRBpajnJShWH80TfjmBxXQrOGvNlaP0f4dVCcK14dn7bI22GaCC_1l-2YoDA_mlDfJRdn66R5TDLrumkWvo-BX522gk-3tmzJ98Npt_zdNK_D3-6X0rcWNwvLa7DuxZUfbEQXOfWlOBp4uT3dwyT7eP9wV0FjJm4zGjR3Vo0yDF4dwgrH360ndePw7RPb9rA7woFzxJDK4Z3I9K2yWZlBwx4szUEDNlmdg1dIQWpSqK04Ys_2tafUbJm7HlXvrH36t1tw_s0mrbrXzu3rpoNlaOqbFK9lKBcgdQ0xafYe2ud-AidwGaerwwRXvGJa9zjx63LHamMM2u9H7eedC7pAs5k0v07nXc5NUjOxPl9ywHrMsYrPyTozwymitb1xmS6O5LWhffvTqZ7IynCz78tAzRWFCwn1iy7HGj4PzvQ0G8d_Geg_TPQ_v8OtKn81gMNrSfaZyvln4n2z0T7j020_fP_0HN8FlUhmI4Z5W_R_muET43WnTKJZp8u_aTnnefZCXk2mAh_r0-HH_-lmqNYPVJa2WNs8B_jouw5_peoegZ_fPWff4_PVJeigue-rkFG-Og4lehlCa81ZcBJC54ZG1Wvhj0at-meecQV0YSRD9HrkRVQG2VfAHX4mETZlwzbF7NMl0sDHXL7gugEpPgUZV9SfEL1KfEQt6jDKH7I0X8btpUrWtKOTR5fT4fXQ456_sbFhSNGeX9Fba_YJDaH5nvPS00Fj79oLU3wYy5qCRBzIaHspTKQ-uBlzEXPL5RXcQutkB8RPnHBwX4iv2hSMP9wrIKacogrJV6ZKAmLo-wlj6-x3ea3OMoT9-yamo_xeG8TysNpAOMAzgI4D-C9MW6NvPbcHFn1SqpKxhHOkvARQYT3RxsbIRemrPmZ57TiOXoeHPA8rnhOnifzPBHe77jQ6cE9vCzbLob3kfnRPVX22goZ03QWMDRGCmAW9VpKql-hOsMSz-Sua3adhAZIBTI8zRtatnhQuflnMqCTUPnn33h_CLxbWb21ZR-IrTXcuOECFOqwkdwpKcgu2SWpi3vXUBen_bMNer4QjG2HeBmpjwvibMyy3djY75L0uGkku2sk-ZyFNLF6zWXB6J2k5s2FzKn7EoBXY0aEfE4yDTMyxVbgOufb7wSytYV0K9NtxuVbKZpmixw1mPyuynStcr-pMl-r3LmYAYNShzWwv1PnoR1v41Y4FNgF8GIvh3U8nU3bSwK2482WD-OWw2bgM3d9-Okx5Hm80TX2lUVu4OTGtW0-l0OqYcvjCGOE8cIv68XYyxZ82aqbYbzMX1s4ocBNLuBfZ7w5ma2cwPn9VvhvNMJFSwo4P9H5ttufV-u2dNjsJDtWKkVuuskYh8OqU221mGlUYF8w9v4IDFrg9vuMdJ6XnaBK8DC3V56MpDDP8cEnSV_XDAb7zcJKr7VuXbtjbhzb5trxLf4JUlBONSWM_hznjlVy3N5C_ostJJs-n36ryJrbui8skvrxU3pOW3eNsLiz5FN6HrfuKVmoJ50K-1b64CNthNC0nTtQ9i1Ui6dSvFGbjcmYhYHJsqlz3ErgwJEgrLfA0on8F06MiY_DOspcxpNqSyLflDjcj1-235SYLnu3AmMfz6ajlrBxrJnvequvpIjWkha9BuXvmKZrHJ__71dmc1uvJWkBdYJybW5LOMq-Rhg7RmwY3B0flV0_UX_yAWS-pNdAdC9BTUwoPRRUo9Z-dHlBGV6sKi4uE2C6SISfD7n9-taApLp6wP6cxQCMiQti9NxohPcH5FgRaa-oqO2AtHAp2o7BdVzW3UyiXJ9GWFMGXu_gmIdrmuxSozpcmok7ss3QPsXe5rymmrDzAnMJV2UVrqp3b2SfYntHnSj1TPC-TyRat2SmdnVIG4qWrtcLfwe2WHFOZ1VDh-3pKyj1gqsTXcn15GzgAIdSmCTQfh0oNCDlOj0s1yenefJzdrCQhJdN3Stq2zQqy7az7zYWJaugFdqfVslq1qtGdHpcX4oRMkPCw60YrGzbleWVVD72pSwzb7O8jrEtrzYp4NxJ-87fy7aykGyV1w0DXKdMqNNDaeg1YQpQBZ1CZ9At4XqFZT9LrpE2rytK27O1irZn7H2F60C2a5Q9kRVSEm7vJ6gmSqM0LT40cNF5dy1uP-Mc1_GWqYCrlhPDmegG5mUjOhXwMr-rcd2KoYBgrUrCiES1epf6Fq0aWmvUEvWmJgOqYRWSQhM9b2Ugkpo-hUoplGKEA_KXikDnxNSBvMdiryCTNyhEbvgCI1Y3hK_UuQqoW5J7hDqrgqiRrOyJGeiqfF2fa1caDZEVcKSYQvSHy_AAJcF61khQDqIVHRjSAlV0KEbEu0f4YqZ86Epqs_XNoBADglQ3Qr0ChcjZLgeKypqOIBOk8tYpdwwXnwrBybaklGKqS4NofVcej9uce0XlIS_mha_v9kKotq2V2_pDXKDioyNKoQoY-dhAoYIB3-JFvp7X6MXRcNHZGHSkQqoRUqPaP1zyiduVrO0dSyl4TX237t56D0y11XXW8U5CDRKl-DROG48J5o_HmPxBEs7UZI8KCWJo1RsSA0g0aFChEBdBlc04VRKtA6QuGxouLjr1Dsu6bOwU7fRFUlc4Xa8a3InOdCskiZnrjltWHa0mUPYjKPmEVQA2_BJ0JxjlsFgguGqQnDCkm56_LWmUV1RCqZHr5nZqb5FLwpijaV8pijR-lKmiMA0NFRLIm89LBdJd-t1COcXq7BNRNbYeVUOm_qwapa1zylwYMmE6MZAJQXlZQTktQ1LXkqq6VMG6Z8wHxyJMj5qo-iJQCy1yjdHhek4YPXOoLMWNwA2CnUJI2Sf6SLW5fRO1RjUTxB-XGnuKUoAnKJugfJcG8MyRu_0oWNoc6U4F0uR8hgqdmSiIOw9d2Fmj1ZVVWtoA6t50B9ctadsuOqah95S7gdErQGfWms6ETI9FpVBajRTFWkQk1c0CLduxQQ7THW9wJfpubxxmbUZ633WuGExD6d581V4KLigf7HFcaAVvviVeT-YjGrqOk-2qyAABWAbwdIuwK2Xvs7_-ldb--c8___rXb787uP3e4GdevGf1Y1pP3xs8VE9Z9Zg9kgd4So84O-XHU5I-NE-P-wLyDPAhS8jpMdkfH_O6qg8k2e-zU5lXD_QJJzhPTukB4-yQp7uM7E9JjYtTdjicMECUJ9ASynaMDa1x4oEq1cNTmuSH_PBgn2Io-xtzjDlcYks19_b91wf5ZIRQ0Z9VlCeMKq1mNZpqBk9_VMA1LQkzn0617F2P9T_C5YIjTd6Ax6XgFTUkwuIffdvZz2nE_yQ5FnXs2CxJNaJnVVxA3AjTQg1YCwk3Sh56yZ5Wwaa66YtdKdoIf7dfvLg31Enxw95ov9sNqgh_9xEYnvD_BgAA__-Vh_pF">