[llvm] [AMDGPU] Verify dominance when rewriting spills to registers (PR #167347)

Mon Nov 10 11:04:50 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Austin Kerbow (kerbowa)

<details>
<summary>Changes</summary>

When performing spill elimination in the AGPR copy rewrite pass it was possible to see spill reloads that were not dominated by any store. This caused invalid MIR to be generated where vreg uses were not dominated by defs. This patch adds a dominance check before rewriting spills.

---

Patch is 172.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167347.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp (+62-4) 
- (added) llvm/test/CodeGen/AMDGPU/rewrite-agpr-spill-multi-store-crash.ll (+2084) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index 89c16dadb4b41..d2c56589acafd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/VirtRegMap.h"
@@ -58,6 +59,7 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
   LiveIntervals &LIS;
   LiveStacks &LSS;
   const RegisterClassInfo &RegClassInfo;
+  MachineDominatorTree &MDT;
 
   bool attemptReassignmentsToAGPR(SmallSetVector<Register, 4> &InterferingRegs,
                                   MCPhysReg PrefPhysReg) const;
@@ -66,10 +68,11 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
   AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
                                 LiveRegMatrix &LRM, LiveIntervals &LIS,
                                 LiveStacks &LSS,
-                                const RegisterClassInfo &RegClassInfo)
+                                const RegisterClassInfo &RegClassInfo,
+                                MachineDominatorTree &MDT)
       : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
         TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
-        LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
+        LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo), MDT(MDT) {}
 
   bool isRewriteCandidate(const MachineInstr &MI) const {
     return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1;
@@ -515,6 +518,56 @@ void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
     if (SpillReferences == SpillSlotReferences.end())
       continue;
 
+    // Verify that each spill restore is dominated by at least one spill save.
+    SmallVector<MachineInstr *, 4> Stores, Loads;
+    Stores.reserve(SpillReferences->second.size());
+    Loads.reserve(SpillReferences->second.size());
+    for (MachineInstr *MI : SpillReferences->second) {
+      if (MI->mayStore())
+        Stores.push_back(MI);
+      else if (MI->mayLoad())
+        Loads.push_back(MI);
+    }
+
+    SmallVector<MachineInstr *, 4> ReachableStores;
+    ReachableStores.reserve(Stores.size());
+    for (MachineInstr *S : Stores)
+      if (MDT.isReachableFromEntry(S->getParent()))
+        ReachableStores.push_back(S);
+
+    if (ReachableStores.empty()) {
+      LLVM_DEBUG(dbgs() << "Skipping " << printReg(Slot, &TRI)
+                        << ": no reachable stores\n");
+      continue;
+    }
+
+    auto IsDominatedByAnyStore = [&](MachineInstr *LoadMI) -> bool {
+      MachineBasicBlock *LoadMBB = LoadMI->getParent();
+      if (!MDT.isReachableFromEntry(LoadMBB))
+        return true;
+      for (MachineInstr *StoreMI : ReachableStores) {
+        MachineBasicBlock *StoreMBB = StoreMI->getParent();
+        if (StoreMBB == LoadMBB) {
+          for (MachineBasicBlock::iterator I = StoreMI->getIterator(),
+                                           E = StoreMBB->end();
+               I != E; ++I)
+            if (&*I == LoadMI)
+              return true;
+          continue;
+        }
+        if (MDT.dominates(StoreMBB, LoadMBB))
+          return true;
+      }
+      return false;
+    };
+
+    if (!llvm::all_of(Loads, IsDominatedByAnyStore)) {
+      LLVM_DEBUG(
+          dbgs() << "Skipping " << printReg(Slot, &TRI)
+                 << ": some reachable load not dominated by any store\n");
+      continue;
+    }
+
     const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
 
     LLVM_DEBUG(dbgs() << "Trying to eliminate " << printReg(Slot, &TRI)
@@ -603,11 +656,13 @@ class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
     AU.addRequired<VirtRegMapWrapperLegacy>();
     AU.addRequired<LiveRegMatrixWrapperLegacy>();
     AU.addRequired<LiveStacksWrapperLegacy>();
+    AU.addRequired<MachineDominatorTreeWrapperPass>();
 
     AU.addPreserved<LiveIntervalsWrapperPass>();
     AU.addPreserved<VirtRegMapWrapperLegacy>();
     AU.addPreserved<LiveRegMatrixWrapperLegacy>();
     AU.addPreserved<LiveStacksWrapperLegacy>();
+    AU.addPreserved<MachineDominatorTreeWrapperPass>();
 
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
@@ -622,6 +677,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
                     "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
 
@@ -641,7 +697,8 @@ bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
   auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
   auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
   auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
-  AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
+  auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+  AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo, MDT);
   return Impl.run(MF);
 }
 
@@ -652,10 +709,11 @@ AMDGPURewriteAGPRCopyMFMAPass::run(MachineFunction &MF,
   LiveRegMatrix &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(MF);
   LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
   LiveStacks &LSS = MFAM.getResult<LiveStacksAnalysis>(MF);
+  MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
   RegisterClassInfo RegClassInfo;
   RegClassInfo.runOnMachineFunction(MF);
 
-  AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
+  AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo, MDT);
   if (!Impl.run(MF))
     return PreservedAnalyses::all();
   auto PA = getMachineFunctionPassPreservedAnalyses();
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-agpr-spill-multi-store-crash.ll b/llvm/test/CodeGen/AMDGPU/rewrite-agpr-spill-multi-store-crash.ll
new file mode 100644
index 0000000000000..070032e03624a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-agpr-spill-multi-store-crash.ll
@@ -0,0 +1,2084 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -O3 -o - %s | FileCheck %s
+
+; This is a reduced testcase that demonstrates a bug fix in AMDGPURewriteAGPRCopyMFMA's
+; eliminateSpillsOfReassignedVGPRs(). When a spill slot has multiple stores in
+; different blocks that don't dominate all loads, the pass must skip elimination
+; to avoid creating invalid SSA by replacing all references with a single vreg.
+
+; CHECK-LABEL: kernel_crash:
+
+source_filename = "LLVMDialectModule"
+target datalayout = "e-m:e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+target triple = "amdgcn-amd-amdhsa"
+
+ at global_smem = external addrspace(3) global [0 x i8]
+
+define amdgpu_kernel void @kernel_crash(i32 inreg %0, ptr addrspace(1) inreg readonly captures(none) %1, i32 inreg %2, i32 %3, i32 %4, i32 %5, i32 %6, ptr addrspace(3) %7, ptr addrspace(3) %global_smem, i1 %8, i32 %9, i32 %10, i32 %11, i32 %12, <8 x half> %13, <8 x half> %14, <8 x half> %15, <8 x half> %16, <8 x half> %17, <8 x half> %18, <8 x half> %19, <8 x half> %20, <8 x half> %21, <8 x half> %22, <8 x half> %23, <8 x half> %24, <8 x half> %25, <8 x half> %26, float %27, ptr addrspace(3) %28, ptr addrspace(3) %29, ptr addrspace(3) %30, ptr addrspace(3) %31, ptr addrspace(3) %32, ptr addrspace(3) %33, ptr addrspace(3) %34, ptr addrspace(3) %35, ptr addrspace(3) %36, ptr addrspace(3) %37, ptr addrspace(3) %38, i1 %exitcond.not, <2 x float> %39, <2 x float> %40, ptr addrspace(3) %41, ptr addrspace(3) %42, ptr addrspace(3) %43, ptr addrspace(3) %44, ptr addrspace(3) %45, ptr addrspace(3) %46, ptr addrspace(3) %47, ptr addrspace(3) %48, ptr addrspace(3) %49, ptr addrspace(3) %50, ptr addrspace(3) %51, ptr addrspace(3) %52, ptr addrspace(3) %53, ptr addrspace(3) %54, ptr addrspace(3) %55, ptr addrspace(3) %56, ptr addrspace(3) %57, ptr addrspace(3) %58, ptr addrspace(3) %59, ptr addrspace(3) %60, i1 %61, <8 x half> %62, <8 x half> %63, <4 x float> %64, <4 x float> %65, <2 x float> %66, <2 x float> %67, i32 %68, <2 x float> %69, <2 x float> %70, <2 x float> %71, <2 x float> %72, <2 x float> %73, <2 x float> %74) #0 {
+  %76 = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %77 = and i32 %76, 127
+  %78 = srem i32 %77, %3
+  %79 = or disjoint i32 %77, 1
+  %80 = srem i32 %77, %2
+  %81 = srem i32 %79, %2
+  %82 = add i32 %80, %4
+  %83 = add i32 %80, %5
+  %84 = add i32 %81, %5
+  %85 = add i32 %81, %6
+  %86 = shl i32 %0, 0
+  %87 = shl nuw nsw i32 %77, 1
+  %88 = getelementptr inbounds nuw i8, ptr addrspace(3) %7, i32 %87
+  store i16 0, ptr addrspace(3) %88, align 2
+  %89 = getelementptr inbounds nuw i8, ptr addrspace(3) %global_smem, i32 %87
+  %90 = xor i32 %87, 1040
+  %91 = getelementptr inbounds nuw i8, ptr addrspace(3) %global_smem, i32 %90
+  %92 = getelementptr inbounds nuw i8, ptr addrspace(3) %91, i32 33024
+  store i16 0, ptr addrspace(3) %92, align 2
+  %93 = getelementptr inbounds nuw i8, ptr addrspace(3) %91, i32 49664
+  %94 = getelementptr inbounds nuw i8, ptr addrspace(3) %91, i32 49920
+  %95 = xor i32 %87, 13520
+  %96 = getelementptr inbounds nuw i8, ptr addrspace(3) %global_smem, i32 %95
+  %97 = getelementptr inbounds nuw i8, ptr addrspace(3) %96, i32 16384
+  store i16 0, ptr addrspace(3) %97, align 2
+  %98 = getelementptr inbounds nuw i8, ptr addrspace(3) %96, i32 16896
+  %99 = getelementptr inbounds nuw i8, ptr addrspace(3) %96, i32 32768
+  br i1 %8, label %.lr.ph, label %.._crit_edge_crit_edge
+
+.._crit_edge_crit_edge:                           ; preds = %75
+  %.pre = and i32 %76, 1
+  br label %._crit_edge
+
+.lr.ph:                                           ; preds = %75
+  %100 = and i32 %76, 152
+  %101 = xor i32 %100, 0
+  %102 = getelementptr inbounds nuw i8, ptr addrspace(3) @global_smem, i32 %9
+  %103 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 4160
+  %104 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 32768
+  %105 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 36928
+  %106 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 64
+  %107 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 4096
+  %108 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 256
+  %109 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 4416
+  %110 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 33024
+  %111 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 37184
+  %112 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 320
+  %113 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 4352
+  %114 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 33088
+  %115 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 37120
+  %116 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 512
+  %117 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 4672
+  %118 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 33280
+  %119 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 37440
+  %120 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 576
+  %121 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 4608
+  %122 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 33344
+  %123 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 37376
+  %124 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 768
+  %125 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 4928
+  %126 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 33536
+  %127 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 37696
+  %128 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 832
+  %129 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 4864
+  %130 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 33600
+  %131 = getelementptr inbounds nuw i8, ptr addrspace(3) %102, i32 37632
+  %132 = getelementptr inbounds nuw i8, ptr addrspace(3) @global_smem, i32 %10
+  %133 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 32768
+  %134 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 36928
+  %135 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 64
+  %136 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 4096
+  %137 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 32832
+  %138 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 36864
+  %139 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 33088
+  %140 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 37120
+  %141 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 512
+  %142 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 37440
+  %143 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 576
+  %144 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 4608
+  %145 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 33344
+  %146 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 37376
+  %147 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 768
+  %148 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 4928
+  %149 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 33536
+  %150 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 37696
+  %151 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 832
+  %152 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 4864
+  %153 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 33600
+  %154 = getelementptr inbounds nuw i8, ptr addrspace(3) %132, i32 37632
+  %155 = getelementptr inbounds nuw i8, ptr addrspace(3) getelementptr (i8, ptr addrspace(3) @global_smem, i32 65536), i32 %9
+  %156 = getelementptr inbounds nuw i8, ptr addrspace(3) %155, i32 1088
+  %157 = getelementptr inbounds nuw i8, ptr addrspace(3) %155, i32 8192
+  %158 = getelementptr inbounds nuw i8, ptr addrspace(3) %155, i32 9280
+  %159 = getelementptr inbounds nuw i8, ptr addrspace(3) %155, i32 64
+  %160 = getelementptr inbounds nuw i8, ptr addrspace(3) %155, i32 1024
+  %161 = getelementptr inbounds nuw i8, ptr addrspace(3) %155, i32 8256
+  %162 = getelementptr inbounds nuw i8, ptr addrspace(3) %155, i32 9216
+  %163 = getelementptr inbounds nuw i8, ptr addrspace(3) getelementptr (i8, ptr addrspace(3) @global_smem, i32 65536), i32 %11
+  %164 = getelementptr inbounds nuw i8, ptr addrspace(3) %163, i32 1088
+  %165 = getelementptr inbounds nuw i8, ptr addrspace(3) %163, i32 8192
+  %166 = getelementptr inbounds nuw i8, ptr addrspace(3) %163, i32 9280
+  %167 = getelementptr inbounds nuw i8, ptr addrspace(3) %163, i32 64
+  %168 = getelementptr inbounds nuw i8, ptr addrspace(3) %163, i32 1024
+  %169 = getelementptr inbounds nuw i8, ptr addrspace(3) %163, i32 8256
+  %170 = getelementptr inbounds nuw i8, ptr addrspace(3) %163, i32 9216
+  %171 = getelementptr inbounds nuw i8, ptr addrspace(3) getelementptr (i8, ptr addrspace(3) @global_smem, i32 65536), i32 %10
+  %172 = getelementptr inbounds nuw i8, ptr addrspace(3) %171, i32 1088
+  %173 = getelementptr inbounds nuw i8, ptr addrspace(3) %171, i32 8192
+  %174 = getelementptr inbounds nuw i8, ptr addrspace(3) %171, i32 9280
+  %175 = getelementptr inbounds nuw i8, ptr addrspace(3) %171, i32 64
+  %176 = getelementptr inbounds nuw i8, ptr addrspace(3) %171, i32 1024
+  %177 = getelementptr inbounds nuw i8, ptr addrspace(3) %171, i32 8256
+  %178 = getelementptr inbounds nuw i8, ptr addrspace(3) %171, i32 9216
+  %179 = getelementptr inbounds nuw i8, ptr addrspace(3) getelementptr (i8, ptr addrspace(3) @global_smem, i32 65536), i32 %101
+  %180 = getelementptr inbounds nuw i8, ptr addrspace(3) %179, i32 1088
+  %181 = getelementptr inbounds nuw i8, ptr addrspace(3) %179, i32 8192
+  %182 = getelementptr inbounds nuw i8, ptr addrspace(3) %179, i32 9280
+  %183 = getelementptr inbounds nuw i8, ptr addrspace(3) %179, i32 64
+  %184 = getelementptr inbounds nuw i8, ptr addrspace(3) %179, i32 1024
+  %185 = getelementptr inbounds nuw i8, ptr addrspace(3) %179, i32 8256
+  %186 = getelementptr inbounds nuw i8, ptr addrspace(3) %179, i32 9216
+  br label %187
+
+187:                                              ; preds = %187, %.lr.ph
+  %.pn5751951 = phi i32 [ %78, %.lr.ph ], [ %343, %187 ]
+  %188 = phi float [ 0.000000e+00, %.lr.ph ], [ %560, %187 ]
+  %189 = phi float [ 0.000000e+00, %.lr.ph ], [ %561, %187 ]
+  %190 = phi float [ 0.000000e+00, %.lr.ph ], [ %562, %187 ]
+  %191 = phi float [ 0.000000e+00, %.lr.ph ], [ %569, %187 ]
+  %192 = phi float [ 0.000000e+00, %.lr.ph ], [ %570, %187 ]
+  %193 = phi float [ 0.000000e+00, %.lr.ph ], [ %571, %187 ]
+  %194 = phi float [ 0.000000e+00, %.lr.ph ], [ %572, %187 ]
+  %195 = phi float [ 0.000000e+00, %.lr.ph ], [ %579, %187 ]
+  %196 = phi float [ 0.000000e+00, %.lr.ph ], [ %580, %187 ]
+  %197 = phi float [ 0.000000e+00, %.lr.ph ], [ %581, %187 ]
+  %198 = phi float [ 0.000000e+00, %.lr.ph ], [ %585, %187 ]
+  %199 = phi float [ 0.000000e+00, %.lr.ph ], [ %590, %187 ]
+  %200 = phi float [ 0.000000e+00, %.lr.ph ], [ %591, %187 ]
+  %201 = phi float [ 0.000000e+00, %.lr.ph ], [ %598, %187 ]
+  %202 = phi float [ 0.000000e+00, %.lr.ph ], [ %599, %187 ]
+  %203 = phi float [ 0.000000e+00, %.lr.ph ], [ %600, %187 ]
+  %204 = phi float [ 0.000000e+00, %.lr.ph ], [ %601, %187 ]
+  %205 = phi float [ 0.000000e+00, %.lr.ph ], [ %606, %187 ]
+  %206 = phi float [ 0.000000e+00, %.lr.ph ], [ %607, %187 ]
+  %207 = phi float [ 0.000000e+00, %.lr.ph ], [ %614, %187 ]
+  %208 = phi float [ 0.000000e+00, %.lr.ph ], [ %615, %187 ]
+  %209 = phi float [ 0.000000e+00, %.lr.ph ], [ %616, %187 ]
+  %210 = phi float [ 0.000000e+00, %.lr.ph ], [ %622, %187 ]
+  %211 = phi float [ 0.000000e+00, %.lr.ph ], [ %623, %187 ]
+  %212 = phi float [ 0.000000e+00, %.lr.ph ], [ %630, %187 ]
+  %213 = phi float [ 0.000000e+00, %.lr.ph ], [ %631, %187 ]
+  %214 = phi float [ 0.000000e+00, %.lr.ph ], [ %632, %187 ]
+  %215 = phi float [ 0.000000e+00, %.lr.ph ], [ %633, %187 ]
+  %216 = phi float [ 0.000000e+00, %.lr.ph ], [ %637, %187 ]
+  %217 = phi float [ 0.000000e+00, %.lr.ph ], [ %644, %187 ]
+  %218 = phi float [ 0.000000e+00, %.lr.ph ], [ %645, %187 ]
+  %219 = phi float [ 0.000000e+00, %.lr.ph ], [ %646, %187 ]
+  %220 = phi float [ 0.000000e+00, %.lr.ph ], [ %647, %187 ]
+  %221 = phi float [ 0.000000e+00, %.lr.ph ], [ %654, %187 ]
+  %222 = phi float [ 0.000000e+00, %.lr.ph ], [ %655, %187 ]
+  %223 = phi float [ 0.000000e+00, %.lr.ph ], [ %656, %187 ]
+  %224 = phi float [ 0.000000e+00, %.lr.ph ], [ %657, %187 ]
+  %225 = phi float [ 0.000000e+00, %.lr.ph ], [ %664, %187 ]
+  %226 = phi float [ 0.000000e+00, %.lr.ph ], [ %665, %187 ]
+  %227 = phi float [ 0.000000e+00, %.lr.ph ], [ %666, %187 ]
+  %228 = phi float [ 0.000000e+00, %.lr.ph ], [ %667, %187 ]
+  %229 = phi float [ 0.000000e+00, %.lr.ph ], [ %674, %187 ]
+  %230 = phi float [ 0.000000e+00, %.lr.ph ], [ %675, %187 ]
+  %231 = phi float [ 0.000000e+00, %.lr.ph ], [ %676, %187 ]
+  %232 = phi float [ 0.000000e+00, %.lr.ph ], [ %677, %187 ]
+  %233 = phi float [ 0.000000e+00, %.lr.ph ], [ %684, %187 ]
+  %234 = phi float [ 0.000000e+00, %.lr.ph ], [ %685, %187 ]
+  %235 = phi float [ 0.000000e+00, %.lr.ph ], [ %686, %187 ]
+  %236 = phi float [ 0.000000e+00, %.lr.ph ], [ %691, %187 ]
+  %237 = phi float [ 0.000000e+00, %.lr.ph ], [ %692, %187 ]
+  %238 = phi float [ 0.000000e+00, %.lr.ph ], [ %696, %187 ]
+  %239 = phi float [ 0.000000e+00, %.lr.ph ], [ %703, %187 ]
+  %240 = phi float [ 0.000000e+00, %.lr.ph ], [ %704, %187 ]
+  %241 = phi float [ 0.000000e+00, %.lr.ph ], [ %705, %187 ]
+  %242 = phi float [ 0.000000e+00, %.lr.ph ], [ %706, %187 ]
+  %243 = phi float [ 0.000000e+00, %.lr.ph ], [ %714, %187 ]
+  %244 = phi float [ 0.000000e+00, %.lr.ph ], [ %7...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/167347