<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/61963>61963</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[Loop Predication] Miscompile due to widening into loop invariant condition
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
annamthomas
</td>
</tr>
</table>
<pre>
This bug manifests when IndVarSimplify transforms the phi value of an exit block to take the value at the first iteration of the loop (`rewriteFirstIterationLoopExitValues` at https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp#L431).
The transform relies on the condition to the exit block being a loop invariant one. Then loop predication's predicateLoopExits transform comes along and changes this loop-invariant condition to a varying one. This means we can now get an incorrect deopt state passed to the interpreter.
Here is the initial IR:
```
; ModuleID = 'bugpoint.ll'
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
target triple = "x86_64-unknown-linux-gnu"
@global = external global ptr addrspace(1)
define i32 @foo(ptr addrspace(3) %arg) !prof !0 {
bb:
%alloca = alloca i1, align 1
%getelementptr1 = getelementptr inbounds i8, ptr addrspace(3) %arg, i64 32
%load2 = load i32, ptr addrspace(3) %getelementptr1, align 4
%getelementptr3 = getelementptr inbounds i8, ptr addrspace(3) %arg, i64 40
%load4 = load i32, ptr addrspace(3) %getelementptr3, align 4
%getelementptr5 = getelementptr inbounds i8, ptr addrspace(3) %arg, i64 56
%load6 = load i32, ptr addrspace(3) %getelementptr5, align 4
%getelementptr7 = getelementptr inbounds i8, ptr addrspace(3) %arg, i64 72
%load8 = load ptr addrspace(1), ptr addrspace(3) %getelementptr7, align 8
%getelementptr9 = getelementptr inbounds i8, ptr addrspace(3) %arg, i64 80
%init_val = load i32, ptr addrspace(3) %getelementptr9, align 4
%getelementptr11 = getelementptr inbounds i8, ptr addrspace(3) %arg, i64 88
%load12 = load i32, ptr addrspace(3) %getelementptr11, align 4
store volatile i1 true, ptr %alloca, align 1
%load13 = load volatile i1, ptr %alloca, align 1
%icmp = icmp eq ptr addrspace(1) %load8, null
%getelementptr14 = getelementptr inbounds i8, ptr addrspace(1) %load8, i64 8
%call = call i1 @llvm.experimental.widenable.condition() #1
%getelementptr15 = getelementptr inbounds i8, ptr addrspace(1) %load8, i64 20
%widenable_cond11 = call i1 @llvm.experimental.widenable.condition() #1
br label %loop_outer
loop_outer: ; preds = %bb34, %bb
%phi = phi i32 [ %phi36, %bb34 ], [ 42, %bb ]
%phi18 = phi i32 [ 2, %bb34 ], [ %load2, %bb ]
%phi19 = phi i32 [ 0, %bb34 ], [ %load4, %bb ]
%phi20 = phi i32 [ 1, %bb34 ], [ %load6, %bb ]
%phi21 = phi i32 [ %add39, %bb34 ], [ %init_val, %bb ]
%phi22 = phi i32 [ %phi35, %bb34 ], [ %load12, %bb ]
%icmp23 = icmp sgt i32 %phi19, 37
%add = add nsw i32 %phi19, 1
%load24 = load atomic ptr addrspace(1), ptr @global unordered, align 8, !nonnull !1
%getelementptr25 = getelementptr i8, ptr addrspace(1) %load24, i64 16
%mul = mul i32 %phi22, 16777216
%add26 = add i32 %mul, -16777216
%ashr = ashr exact i32 %add26, 24
%add27 = add i32 %phi, 1
%icmp28 = icmp eq i32 %add27, 0
%load29 = load atomic i32, ptr addrspace(1) %getelementptr14 unordered, align 8, !range !5, !invariant.load !1, !noundef !1
%icmp30 = icmp ugt i32 %load29, 1
%and = and i1 %icmp30, %call
%load31 = load atomic i32, ptr addrspace(1) %getelementptr15 unordered, align 4, !tbaa !6, !noundef !1
%icmp32 = icmp eq i32 %load31, 0
store atomic ptr addrspace(1) null, ptr addrspace(1) %getelementptr25 unordered, align 8
store ptr addrspace(1) null, ptr addrspace(256) inttoptr (i64 8 to ptr addrspace(256)), align 8, !tbaa !8
br i1 %widenable_cond11, label %bb33, label %deopt9, !prof !10
bb33: ; preds = %loop_outer
store atomic i32 606, ptr addrspace(1) %getelementptr15 unordered, align 4, !tbaa !6
br label %inner_loop
bb34: ; preds = %bb54
%phi35 = phi i32 [ %ashr47, %bb54 ]
%phi36 = phi i32 [ %add48, %bb54 ]
%add37 = add i32 %phi18, 1
%icmp38 = icmp sgt i32 %add37, 12
%add39 = add i32 %phi21, 1
%icmp40 = icmp sgt i32 %add39, 4
br label %loop_outer
inner_loop: ; preds = %bb54, %bb33
%phi42 = phi i32 [ %ashr, %bb33 ], [ %ashr47, %bb54 ]
%phi43 = phi i32 [ 1, %bb33 ], [ %add55, %bb54 ]
%phi44 = phi i32 [ %add27, %bb33 ], [ %add48, %bb54 ]
%mul45 = mul i32 %phi42, 16777216
%add46 = add i32 %mul45, -16777216
%ashr47 = ashr exact i32 %add46, 24
%add48 = add i32 %phi44, 1
%icmp49 = icmp eq i32 %add48, 0
br i1 %icmp49, label %bb57, label %bb54, !prof !11, !make.implicit !1
deopt9: ; preds = %loop_outer
%lcssa = phi i32 [ %phi21, %loop_outer ]
%phi51 = phi i32 [ %ashr, %loop_outer ]
%phi52 = phi i32 [ %add27, %loop_outer ]
%call53 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 13) #2 [ "deopt"(i32 0, i32 1, i32 1291853205, i32 127, i32 3, i32 13, i32 0, i32 0, ptr addrspace(1) %load8, i32 3, i32 1, i32 3, i32 606, i32 7, ptr null, i32 7, ptr null, i32 3, i32 %phi52, i32 7, ptr null, i32 3, i32 %load2, i32 7, ptr null, i32 7, ptr null, i32 3, i32 1, i32 3, i32 0, i32 0, ptr addrspace(1) %load8, i32 3, i32 %lcssa, i32 3, i32 %phi51, i32 7, ptr null) ]
ret i32 %call53
bb54: ; preds = %inner_loop
store atomic i32 606, ptr addrspace(1) %getelementptr15 unordered, align 4, !tbaa !6
%add55 = add nuw nsw i32 %phi43, 1
%icmp56 = icmp ugt i32 %add55, 9
br i1 %icmp56, label %bb34, label %inner_loop, !llvm.loop !12
bb57: ; preds = %inner_loop
%phi58 = phi i32 [ %phi18, %inner_loop ]
%phi59 = phi i32 [ %phi21, %inner_loop ]
%phi60 = phi i32 [ %phi43, %inner_loop ]
%phi61 = phi i32 [ %ashr47, %inner_loop ]
%call62 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 12) #2 [ "deopt"(i32 0, i32 1, i32 1291853205, i32 99, i32 2, i32 13, i32 0, i32 3, i32 0, i32 3, i32 0, i32 7, ptr null, i32 7, ptr null, i32 3, i32 0, i32 7, ptr null, i32 3, i32 %phi58, i32 7, ptr null, i32 7, ptr null, i32 3, i32 1, i32 3, i32 %phi60, i32 0, ptr addrspace(1) %load8, i32 3, i32 %phi59, i32 3, i32 %phi61, i32 7, ptr null) ]
ret i32 %call62
}
declare i32 @llvm.experimental.deoptimize.i32(...)
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite)
declare noundef i1 @llvm.experimental.widenable.condition() #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) }
attributes #1 = { "inline-remark"="cost=never, reason=unavailable definition" }
attributes #2 = { "deopt-lowering"="live-in" "inline-remark"="cost=never, reason=unavailable definition" }
!0 = !{!"function_entry_count", i64 32768}
!1 = !{}
!4 = !{!"tbaa-access-type"}
!5 = !{i32 0, i32 2147483646}
!6 = !{!7, !7, i64 0}
!7 = !{!"int.array", !4}
!8 = !{!9, !9, i64 0}
!9 = !{!"pendingException_access", !4}
!10 = !{!"branch_weights", i32 1048576, i32 1}
!11 = !{!"branch_weights", i32 1, i32 983039}
!12 = distinct !{!12, !13}
!13 = !{!"llvm.loop.peeled.count", i32 1}
```
See that `init_val` is the incoming value for `phi21` which is in `loop_outer` (header of outer loop). `deopt9` is an exit block out of the outer_loop which being taken or not depends on invariant condition `widenable_cond11 `. Given these facts, we know that if the exit is taken, it will be taken on first iteration. So, `indvars` went ahead and did this transform for `deopt9`:
` %lcssa = phi i32 [ %phi_21, %loop_outer ]` => ` %lcssa = phi i32 [ %init_val, %loop_outer ]`.
Until this point the IR is correct. Here is the transform in complete: https://godbolt.org/z/PrTz6KK6b
Then loop predication predicateLoopExit transform folds the check in `inner_loop` to false by widening the widenable branch outside the inner_loop. This is the widenable branch at `br i1 %widenable_cond11, label %bb33, label %deopt9,`.
This is the result after loop-predication is run: https://godbolt.org/z/sq6d1vMWa
Loop predication's predicateLoopExit pass does two incorrect things:
1. It sinks the widenable call into the loop, thereby converting an invariant condition to a variant one (this was being fixed here: https://reviews.llvm.org/D146792)
2. It widens the widenable call thereby converting it into a loop-varying one.
Both of these together now mean that if `deopt9` is taken, it may no longer be taken on first iteration. This means the deopt state in `deopt9` block is incorrect, since it directly uses `init_val`.
The complete fix for this is by making sure we do the widening at the right point: which is, we need to widen at the widenable call (not at the widenable branch as it is done today, at `loop_outer` block) and do not sink the widenable call down the loop.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy8Wt9z47aP_2uUF449EvXTD3nYbJq7nbYzne5e7zFDSbDNi0yqJBUn_etvQEqyfnqz2Xy7s5EpigBBAPwAlMC05gcBcOvFd158f8Mac5TqlgnBTuYoT0zf5LJ8vf125JrkzYGcmOB70EaT8xEE-SLKv5j6yk91xfevxCgm9F6qkybmCKQ-cvLMqgaI3BMmCLxwQ_JKFk_ESGLYE9hhbggz9mbPlTaEG1DMcCmQErsrKWvi0cxLfAVnxQ084MAv3bjfpKx_eeHmL-SlvcRHfkdjau2Fnzz64NGHAzfHJt8W8uTRh6p67n42tZL_B4Xx6ENeydyjDyfGxWAMx75v_dI8-vC1YBVTHn0Yr39b1LVHw9-iMPDobks8_97zP7nrtyNc1EMUVBw0kcIurpCi5Ha1qJYjDBWVAxcHwpwCuHhmijNhiBSwJd_QBPZBraDkhdWER1Pd30OnFz2YvJAn0IRVEhmLkhRHJg6AJuPasttc5hmJxsgzU68oTzs91-QETGhyBlIwQYQ8kwMYtDUXhVQKCkNKkLUh2jADpGZaQ9ktkwsDqlZgQI2V9d-ggHDdDuKGs4p8-RMt6QYlfvvf3YZ35HdZNhV8uSdeeE88mubNoZZcmG1VeTR14wxTKFzJDKvYq2xMO5jC5uSFn2BT09T3wk8htRe8Dca32Eoie9nwrrHPkCig2UZk2Eg6miTafLXdHLs3Ne0eZO1fSN0z6lE6EtEoXlfQifeSJY9JtGnEk5Bnsam4aF42B9H0VO018g-VzFll6eDFgBKsIm1fbRRhZal0zQrwaIYeOqQuYc8FEB5S4kX-XkqPZlOa0KM74tGYqYNrBbWSe_z1iZfeOUZ53tuJ2MFVJQtmZWqbPPDoZ8IqfhAkGIw8gIEKTiBMbVRgKUZdhItcNqLUhGfI4ap4nwlPIhLSAf9KspJattjCpV7jMpbmInG0JnH4IRJH_kTi6H0Sh2-QOP4QieNkInHyPonjN0icfojE6dQrsovEi_vkrUtIL0vI2immQ3YfsoJs6CWIkI_P7b7_YbXv5mqfef_HbMYsm6g9eO9uXNiO2kgF5FlWzPAKCA-IUQ10LHscWkYeK0x4EWbA5o0ceHGqLb1twN-LftS7G_IQTVWt6Tv6cX3P-FuNDyQsWOU8xDZ4gDCPGc4WXmpQHKdh1fbMSxAsr2DbR37MuizvMFiT9x1AsigvHbp1L8ojitL64M8LT3JFKpZD5WaX9aNsDKhhLBz0hp8IpheYT-k2Hsd5HkYosW0OBMZ0F4fgr42j8V3bHSb9-DAiXnxvb-M7EtH-ge0eMQuyGTu6wqeLbte47Wbc_OvcoivcqD_jFlznllzjFiwpjpVluFtn2sHeEt-WLV2zR3xd1mBVkbi9aXjZ6fpgHO9Wx0gXpsP0pyxd7lOWROjzbPAUhugg3jMjT7y4GpIuSV8jpCpBQTmMQXYVgZAC0Qaba1uYLm7h7-5cGnVbN0gunE-NQxr8vayXWqUGSZqmNEjGKqJJr6SW4NRYw24m43G4Pio3GhvwworeBJYTktFowj-d8q-PfKp-a9tshOIDvja2T5MzuptZayWQBYuBLLpmNoXHMmy03hr0x7KtndCas7NwI0rYDy3crSj0LytqLt7qpJ-qAI-DVlGitCjbsWi3A6LvRANh8HMaiJc00CJPYHLG8Dd5wzLpkuGchCPLuUThytZykfmtC6CLC8jGs_3QNDRO8DEXxkiXeWQ2luOpeXlsiwdj7-l0lw3injPpNLAiQR8R8zwMRx32-N7CcH_eC_xhvLQ0S5FyGl4n2kcjJX7y4c7i5hrGeS4EqEcUZyJ3tBLh42gcocJ4MULpo4rSPlbE0Ty0hclKaIuydTqMfEuIFWRLmBVmi_HIMrHj6YT1boE1DZZYR_4qa-sS0duTqoEJVlXeB-VwrMRoMZCj9gckkzj-BttE4bUkZsawLOP4Or9oxdY0vcp2zRVcGIzihWAaXQmm0VIwjeIr4TRK1wNqtBhQo2zBh6Jo6EOdC-1WIqpbtT-DJ0c0AaU4nXZEU0jqYuGJPcHWvpEtuBmEiu49l4Wzt6EVdhZas5U0knbecqGcO0W8nNtefPcq8bLjDzxqlRpjdRwODk1W8dl2u7XQunR-srrhJ_4PbG0Mz5AmaI_g_fTUDvNoN8AmB3Zk36C7IItD6seXnrRrhn1f3_JHje-fFEdcZj1tRMFm2vHrYu1qZ0_d6f3No_tz1zsnnC_gZ9TReezauoIVoXZD71HQA4DzonHQjG3Q_JF_s602jcf_amLQg_nlYNacJ4ezKFyAsjhZTKX7wLBbxLI4mSZY0ahjoAsnr92Y7QevIKAT5aeL2DVXaGvw-TuEYSoxolyAn_k7gwnyXaNO5u8Ihsr9HvUqbvZRfZG8ddqEfgD00Q-Avt2ua9EryPeWnneAy3XaKThkHwpjnRP8HJpZL1zj_R40S7oNld6P84KiYqr_APYGD3HuNPoCF96Rh0YU9nvpJ2OUxr0qJE6bs-KJCLlXAERI_SoKgqfZMxcl0TUUTcUMnsnImVeVAtMoQU5wkuoVfU2wogCteY6Qd0KuClhpv4P3MnQr6A7J73pLOjrXMWMUzxsD2j5yWJPe_QsrIr11xjIEvQwepVxUXMBGwYmpJ9yU4b1HaSG18cJ7Ac9gMywFTEvhhfeNYM-MV1Yk-7WzXTpdm4wOJ7Pm31TyDIqLQz9bxZ9hwx2X_4RE7dV-YbVIH3jpHV4p3bee9gjCqNfHQjbCQVP36TNNsgsf2r5AbzkM-qMZZwyYG2efjXmtAbkOCOIBwQhkaBClURYmUTIcnoz5t-DtflFQfzg4nQnDhdkypdhruzaUeEiRjSm6NxW7Rfa7GfsaRMnF4ZeXAmqrTbfulckWzJArJorj4xn44Wg6OouIfpTFaXIByCGf4Af49JEkC_1wN2LjXLTk2nBRmAu_7gV2EISj8eFs2j7Z2NYAFZTbkR-N5Z6UXdjrVwBijswQL_H7N_KJf6neKOSJi0Nb5LOXCge6DCLxyfnIiyOO5QL7BycwTB5odgRWgiJyT9wRx2VJuy0Obk9ybq5xdZFsTFc4ZOlcjuAmcwU1hj2BIFIRIQ0pAb3AluMsVb54iT__FpX4W0L-iz-DLeHRQPasQLN9JmcgT0KenVb4_lLQgyrBaa1mjYVFkkMnipiWPm3JV2mtiIotn5myZU1nEIYw1It9SVvy0tXtXMp7Wh336hmWzXzvRPu4dqR1uZwX_kK-x2XyWWbGZ-uE-R9heOVEt4U6Vk1f_kQltYVDWzIsA7qsjwtSyFNdgQEMG5MiL1nmsjJbqQ4effjHow9_qG__JL_-muQjQLXXxfqpee3USLdV6eQpjlA8tW47SL4TnxhJ9qzSQPJXYv3G-tsRSO9ExO119E3NS2g3SsejLaxq1z0jclvtZ1_mXgzRqeIypQLdVIawfbfjNkP1cE1UI96ief13UgbPv_8vG07025vK1WyhGCklaGLOclBMZo5cHHTv0sGWfDFEc_E01ZZL_EVbadadrswRFOSvuLmfQRlbWre86btyt67aDtHIeuuZ6RZD9vwFSoIc59pQ8MzhrLcWXp1K7oMoSXe0z9eold1KvCj8gqwIIsJKZs0yrMYb6vhOmmOLfxqIkQdAXrY67wRM9Mg0BdEhOp3YKxGSVFIcQF2HqUEdIC5jWO_n9sdlEgfPFu9bi-J0mosCcNKSY1f1ShqNKdgookzcFXoQQDtY0DOtD-ev5MSeUDG6UYB4XMqLgq3NHdwoDLUOftCCXTBqQVyAq1S0VB3JxEYezTCAzB52e1UTh_slepCRJWYxn9stPA52VjOY-VpUlzYuoV8vzVrKs-jdektuytuw3IU7dgO3QZL5Kd2FlN4cb5M0T-J8n5cxMNiFSRJlCaX7vIjTJIZdfsNvqU9DP_JjGgQxpdvIL0qguwD2sKdJFnmRDyfGq96Pb7jWDdwmwS4JbyysaFs7TKmAM7EPMXeI72_UrS2uzZuDxmMI1-ayG24MN5UtOrZw8McADuJ78jvXaFuOWXEDvQnsBkDvn5TC9nv2plHV7Q_X_FqZtUcf7Jr-PwAA__8nIiB7">