[llvm] [CodeGen] Introduce MachineLaneSSAUpdater for SSA Repair in Machine IR (PR #163421)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 15 09:40:18 PDT 2025
https://github.com/alex-t updated https://github.com/llvm/llvm-project/pull/163421
>From 412dba31d61d9755893ae8c1a3e28c78f1b62ad2 Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Thu, 2 Oct 2025 19:05:52 +0000
Subject: [PATCH 1/7] [CodeGen] Implement MachineLaneSSAUpdater for lane-aware
SSA repair
This patch introduces MachineLaneSSAUpdater, a new utility for performing
SSA repair on Machine IR with full subregister lane awareness. This is
particularly important for AMDGPU and other targets with complex
subregister structures.
Key features:
- Two explicit entry points: addDefAndRepairNewDef() for new definitions
and addDefAndRepairAfterSpill() for reload-after-spill scenarios
- Lane-aware pruned IDF computation using LLVM's IDFCalculatorBase
- Worklist-driven PHI placement algorithm that correctly handles
iterative PHI insertion
- Per-edge lane analysis for complex PHI construction with dual-PHI
support when both old and new register lanes are live
- SpillCutCollector helper for capturing liveness endpoints during
spill operations with proper subrange refinement
The implementation follows standard SSA reconstruction algorithms but
extends them to handle subregister lanes properly. PHI placement uses
pruned iterated dominance frontiers, and the lane analysis ensures
correct PHI operand construction even in complex scenarios where
different lanes come from different predecessors.
The design separates concerns cleanly:
- Entry points handle scenario-specific setup (indexing, interval extension)
- Common performSSARepair() handles PHI placement and use rewriting
- Lane-aware analysis throughout maintains correctness for partial
register operations
This is the foundation for efficient SSA repair in the presence of
complex subregister usage patterns, with rewriteDominatedUses()
implementation to follow in subsequent patches.
---
.../llvm/CodeGen/MachineLaneSSAUpdater.h | 217 +++++
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp | 775 ++++++++++++++++++
3 files changed, 993 insertions(+)
create mode 100644 llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
create mode 100644 llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
diff --git a/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
new file mode 100644
index 0000000000000..fb260a1d27bb1
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
@@ -0,0 +1,217 @@
+//===- MachineLaneSSAUpdater.h - SSA repair for Machine IR (lane-aware) -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// === MachineLaneSSAUpdater Design Notes ===
+//
+
+#ifndef LLVM_CODEGEN_MACHINELANESSAUPDATER_H
+#define LLVM_CODEGEN_MACHINELANESSAUPDATER_H
+
+#include "llvm/MC/LaneBitmask.h" // LaneBitmask
+#include "llvm/ADT/SmallVector.h" // SmallVector
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Register.h" // Register
+#include "llvm/CodeGen/SlotIndexes.h" // SlotIndex
+#include "llvm/CodeGen/LiveInterval.h" // LiveRange
+
+namespace llvm {
+
+// Forward declarations to avoid heavy includes in the header.
+class MachineFunction;
+class MachineBasicBlock;
+class MachineInstr;
+class LiveIntervals;
+class LiveRange;
+class MachineDominatorTree;
+class TargetRegisterInfo;
+class MachinePostDominatorTree; // optional if you choose to use it
+
+//===----------------------------------------------------------------------===//
+// CutEndPoints: Opaque token representing a spill-time cut of a value.
+// Constructed only by SpillCutCollector and consumed by the updater in
+// addDefAndRepairAfterSpill().
+//===----------------------------------------------------------------------===//
+class CutEndPoints {
+public:
+ CutEndPoints() = delete;
+
+ Register getOrigVReg() const { return OrigVReg; }
+ SlotIndex getCutIdx() const { return CutIdx; }
+ const SmallVector<LaneBitmask, 4> &getTouchedLaneMasks() const { return TouchedLaneMasks; }
+
+ // Access to captured endpoint data for extendToIndices()
+ const SmallVector<SlotIndex, 8> &getMainEndPoints() const { return MainEndPoints; }
+ const DenseMap<LaneBitmask, SmallVector<SlotIndex, 8>> &getSubrangeEndPoints() const {
+ return SubrangeEndPoints;
+ }
+
+ // Optional: debugging aids (not required for functionality).
+ const SmallVector<LiveRange::Segment, 4> &getDebugSegsBefore() const { return SegsBefore; }
+
+private:
+ friend class SpillCutCollector; // only the collector can create valid tokens
+
+ // Private constructor used by the collector.
+ CutEndPoints(Register VReg,
+ SlotIndex Cut,
+ SmallVector<LaneBitmask, 4> Lanes,
+ SmallVector<SlotIndex, 8> MainEP,
+ DenseMap<LaneBitmask, SmallVector<SlotIndex, 8>> SubEP,
+ SmallVector<LiveRange::Segment, 4> Before)
+ : OrigVReg(VReg), CutIdx(Cut),
+ TouchedLaneMasks(std::move(Lanes)),
+ MainEndPoints(std::move(MainEP)),
+ SubrangeEndPoints(std::move(SubEP)),
+ SegsBefore(std::move(Before)) {}
+
+ Register OrigVReg;
+ SlotIndex CutIdx;
+ SmallVector<LaneBitmask, 4> TouchedLaneMasks; // main + touched subranges
+
+ // Captured endpoint data for extendToIndices()
+ SmallVector<SlotIndex, 8> MainEndPoints;
+ DenseMap<LaneBitmask, SmallVector<SlotIndex, 8>> SubrangeEndPoints;
+
+ // Optional diagnostics: segments before pruning (for asserts/debug dumps).
+ SmallVector<LiveRange::Segment, 4> SegsBefore;
+};
+
+//===----------------------------------------------------------------------===//
+// SpillCutCollector: captures EndPoints at spill-time by calling pruneValue()
+// on the main live range and the touched subranges. The opaque CutEndPoints
+// are later consumed by the updater.
+//===----------------------------------------------------------------------===//
+class SpillCutCollector {
+public:
+ explicit SpillCutCollector(LiveIntervals &LIS, MachineRegisterInfo &MRI)
+ : LIS(LIS), MRI(MRI) {}
+
+ // Decide a cut at CutIdx for OrigVReg (lane-aware). This should:
+ // - call pruneValue() on main + subranges as needed,
+ // - stash the returned endpoints needed by extendToIndices(),
+ // - return an opaque token capturing OrigVReg, CutIdx, and masks.
+ CutEndPoints cut(Register OrigVReg, SlotIndex CutIdx, LaneBitmask LanesToCut);
+
+private:
+ LiveIntervals &LIS;
+ MachineRegisterInfo &MRI;
+};
+
+//===----------------------------------------------------------------------===//
+// MachineLaneSSAUpdater: universal SSA repair for Machine IR (lane-aware)
+// * addDefAndRepairNewDef : for plain new defs (no prior pruneValue)
+// * addDefAndRepairAfterSpill: for reloads (must consume CutEndPoints)
+//===----------------------------------------------------------------------===//
+class MachineLaneSSAUpdater {
+public:
+ MachineLaneSSAUpdater(MachineFunction &MF,
+ LiveIntervals &LIS,
+ MachineDominatorTree &MDT,
+ const TargetRegisterInfo &TRI)
+ : MF(MF), LIS(LIS), MDT(MDT), TRI(TRI) {}
+
+ // Plain new-def path (no EndPoints required). The updater derives any
+ // necessary data from the intact LIS.
+ Register addDefAndRepairNewDef(MachineInstr &NewDefMI,
+ Register OrigVReg,
+ LaneBitmask DefMask);
+
+ // Reload-after-spill path (requires spill-time EndPoints). Will assert
+ // if the token does not match the OrigVReg or if indices are inconsistent.
+ Register addDefAndRepairAfterSpill(MachineInstr &ReloadMI,
+ Register OrigVReg,
+ LaneBitmask DefMask,
+ const CutEndPoints &EP);
+
+private:
+ // Common SSA repair logic used by both entry points
+ void performSSARepair(Register NewVReg, Register OrigVReg,
+ LaneBitmask DefMask, MachineBasicBlock *DefBB);
+
+ // Optional knobs (fluent style); no-ops until implemented in .cpp.
+ MachineLaneSSAUpdater &setUndefEdgePolicy(bool MaterializeImplicitDef) {
+ UndefEdgeAsImplicitDef = MaterializeImplicitDef; return *this; }
+ MachineLaneSSAUpdater &setVerifyOnExit(bool Enable) {
+ VerifyOnExit = Enable; return *this; }
+
+private:
+ // --- Internal helpers (declarations only; implement in .cpp) ---
+
+ // Index MI in SlotIndexes / LIS maps immediately after insertion.
+ // Returns the SlotIndex assigned to the instruction.
+ SlotIndex indexNewInstr(MachineInstr &MI);
+
+ // Extend the main live range and the specific subranges at MI's index
+ // for the lanes actually used/defined.
+ void extendPreciselyAt(const Register VReg,
+ const SmallVector<LaneBitmask, 4> &LaneMasks,
+ const MachineInstr &AtMI);
+
+ // Compute pruned IDF for a set of definition blocks (usually {block(NewDef)}),
+ // intersected with blocks where OrigVReg lanes specified by DefMask are live-in.
+ void computePrunedIDF(Register OrigVReg,
+ LaneBitmask DefMask,
+ ArrayRef<MachineBasicBlock *> NewDefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &OutIDFBlocks);
+
+ // Insert lane-aware Machine PHIs with iterative worklist processing.
+ // Seeds with InitialVReg definition, computes IDF, places PHIs, repeats until convergence.
+ // Returns all PHI result registers created during the iteration.
+ SmallVector<Register> insertLaneAwarePHI(Register InitialVReg,
+ Register OrigVReg,
+ LaneBitmask DefMask,
+ MachineBasicBlock *InitialDefBB);
+
+ // Helper: Create PHI in a specific block with per-edge lane analysis
+ SmallVector<Register> createPHIInBlock(MachineBasicBlock &JoinMBB,
+ Register OrigVReg,
+ Register NewVReg,
+ LaneBitmask ResultMask);
+
+ // Rewrite dominated uses of OrigVReg to NewSSA according to the
+ // exact/subset/super policy; create REG_SEQUENCE only when needed.
+ void rewriteDominatedUses(Register OrigVReg,
+ Register NewSSA,
+ LaneBitmask MaskToRewrite);
+
+ // --- Data members ---
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ MachineDominatorTree &MDT;
+ const TargetRegisterInfo &TRI;
+
+ bool UndefEdgeAsImplicitDef = true; // policy hook
+ bool VerifyOnExit = true; // run MF.verify()/LI.verify() at end
+};
+
+// DenseMapInfo specialization for LaneBitmask
+template<>
+struct DenseMapInfo<LaneBitmask> {
+ static inline LaneBitmask getEmptyKey() {
+ // Use a specific bit pattern for empty key
+ return LaneBitmask(~0U - 1);
+ }
+
+ static inline LaneBitmask getTombstoneKey() {
+ // Use a different bit pattern for tombstone
+ return LaneBitmask(~0U);
+ }
+
+ static unsigned getHashValue(const LaneBitmask &Val) {
+ return (unsigned)Val.getAsInteger();
+ }
+
+ static bool isEqual(const LaneBitmask &LHS, const LaneBitmask &RHS) {
+ return LHS == RHS;
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINELANESSAUPDATER_H
\ No newline at end of file
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f8f9bbba53e43..68a57539fe255 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -148,6 +148,7 @@ add_llvm_component_library(LLVMCodeGen
MachineSizeOpts.cpp
MachineSSAContext.cpp
MachineSSAUpdater.cpp
+ MachineLaneSSAUpdater.cpp
MachineStripDebug.cpp
MachineTraceMetrics.cpp
MachineUniformityAnalysis.cpp
diff --git a/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
new file mode 100644
index 0000000000000..14707871623de
--- /dev/null
+++ b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
@@ -0,0 +1,775 @@
+//===- MachineLaneSSAUpdater.cpp - SSA repair for Machine IR (lane-aware) ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineLaneSSAUpdater - a universal SSA repair utility
+// for Machine IR that handles both regular new definitions and reload-after-
+// spill scenarios with full subregister lane awareness.
+//
+// Key features:
+// - Two explicit entry points: addDefAndRepairNewDef and addDefAndRepairAfterSpill
+// - Lane-aware PHI insertion with per-edge masks
+// - Pruned IDF computation (NewDefBlocks ∩ LiveIn(OldVR))
+// - Precise LiveInterval extension using captured EndPoints
+// - REG_SEQUENCE insertion only when necessary
+// - Preservation of undef/dead flags on partial definitions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLaneSSAUpdater.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "machine-lane-ssa-updater"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SpillCutCollector Implementation
+//===----------------------------------------------------------------------===//
+
+CutEndPoints SpillCutCollector::cut(Register OrigVReg, SlotIndex CutIdx,
+ LaneBitmask LanesToCut) {
+ LLVM_DEBUG(dbgs() << "SpillCutCollector::cut VReg=" << OrigVReg
+ << " at " << CutIdx << " lanes=" << PrintLaneMask(LanesToCut) << "\n");
+
+ assert(OrigVReg.isVirtual() && "Only virtual registers can be cut for spilling");
+
+ LiveInterval &LI = LIS.getInterval(OrigVReg);
+ SmallVector<LaneBitmask, 4> TouchedLanes;
+ SmallVector<LiveRange::Segment, 4> DebugSegsBefore;
+ SmallVector<SlotIndex, 8> MainEndPoints;
+ DenseMap<LaneBitmask, SmallVector<SlotIndex, 8>> SubrangeEndPoints;
+
+ // Store debug information before pruning
+ for (const LiveRange::Segment &S : LI.segments) {
+ DebugSegsBefore.push_back(S);
+ }
+
+ // Use MRI to get the accurate full mask for this register class
+ LaneBitmask RegClassFullMask = MRI.getMaxLaneMaskForVReg(OrigVReg);
+ bool HasSubranges = !LI.subranges().empty();
+ bool IsFullRegSpill = (LanesToCut == RegClassFullMask) || (!HasSubranges && MRI.shouldTrackSubRegLiveness(OrigVReg));
+
+ LLVM_DEBUG(dbgs() << " HasSubranges=" << HasSubranges
+ << " RegClassFullMask=" << PrintLaneMask(RegClassFullMask)
+ << " shouldTrackSubRegLiveness=" << MRI.shouldTrackSubRegLiveness(OrigVReg)
+ << " IsFullRegSpill=" << IsFullRegSpill << "\n");
+
+ if (IsFullRegSpill) {
+ // Whole-register spill: prune main range only
+ if (LI.liveAt(CutIdx)) {
+ TouchedLanes.push_back(LanesToCut);
+ LIS.pruneValue(LI, CutIdx, &MainEndPoints);
+ LLVM_DEBUG(dbgs() << " Pruned main range (whole-reg) with " << MainEndPoints.size()
+ << " endpoints\n");
+ }
+ } else {
+ // Partial-lane spill: refine-then-operate on subranges
+ LLVM_DEBUG(dbgs() << " Partial-lane spill: refining subranges for "
+ << PrintLaneMask(LanesToCut) << "\n");
+
+ // Step 1: Collect subranges that need refinement
+ SmallVector<LiveInterval::SubRange *, 4> SubrangesToRefine;
+ SmallVector<LiveInterval::SubRange *, 4> PreciseMatches;
+
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ LaneBitmask Overlap = SR.LaneMask & LanesToCut;
+ if (Overlap.none()) {
+ continue; // No intersection, skip
+ }
+
+ if (Overlap == SR.LaneMask) {
+ // SR is completely contained in LanesToCut
+ PreciseMatches.push_back(&SR);
+ LLVM_DEBUG(dbgs() << " Found " << (SR.LaneMask == LanesToCut ? "precise" : "subset")
+ << " match: " << PrintLaneMask(SR.LaneMask) << "\n");
+ } else {
+ // Partial overlap: need to refine this subrange
+ SubrangesToRefine.push_back(&SR);
+ LLVM_DEBUG(dbgs() << " Need to refine: " << PrintLaneMask(SR.LaneMask)
+ << " (overlap=" << PrintLaneMask(Overlap) << ")\n");
+ }
+ }
+
+ // Step 2: Refine overlapping subranges into disjoint ones
+ for (LiveInterval::SubRange *SR : SubrangesToRefine) {
+ LaneBitmask OrigMask = SR->LaneMask;
+ LaneBitmask SpillMask = OrigMask & LanesToCut;
+ LaneBitmask KeepMask = OrigMask & ~LanesToCut;
+
+ LLVM_DEBUG(dbgs() << " Refining " << PrintLaneMask(OrigMask)
+ << " into Spill=" << PrintLaneMask(SpillMask)
+ << " Keep=" << PrintLaneMask(KeepMask) << "\n");
+
+ // Create new subrange for spilled portion (SpillMask is always non-empty here)
+ LiveInterval::SubRange *SpillSR = LI.createSubRange(LIS.getVNInfoAllocator(), SpillMask);
+ // Copy liveness from original subrange
+ SpillSR->assign(*SR, LIS.getVNInfoAllocator());
+ PreciseMatches.push_back(SpillSR);
+ LLVM_DEBUG(dbgs() << " Created spill subrange: " << PrintLaneMask(SpillMask) << "\n");
+
+ // Update original subrange to keep-only portion (KeepMask is always non-empty here)
+ SR->LaneMask = KeepMask;
+ LLVM_DEBUG(dbgs() << " Updated original to keep: " << PrintLaneMask(KeepMask) << "\n");
+ }
+
+ // Step 3: Prune only the precise matches for LanesToCut
+ for (LiveInterval::SubRange *SR : PreciseMatches) {
+ if (SR->liveAt(CutIdx) && (SR->LaneMask & LanesToCut).any()) {
+ TouchedLanes.push_back(SR->LaneMask);
+ SmallVector<SlotIndex, 8> SubEndPoints;
+ LIS.pruneValue(*SR, CutIdx, &SubEndPoints);
+ SubrangeEndPoints[SR->LaneMask] = std::move(SubEndPoints);
+ LLVM_DEBUG(dbgs() << " Pruned subrange " << PrintLaneMask(SR->LaneMask)
+ << " with " << SubrangeEndPoints[SR->LaneMask].size() << " endpoints\n");
+ }
+ }
+
+ // Note: Do NOT prune main range for partial spills - subranges are authoritative
+ }
+
+ LLVM_DEBUG(dbgs() << " Cut complete: " << TouchedLanes.size()
+ << " touched lane masks\n");
+
+ return CutEndPoints(OrigVReg, CutIdx, std::move(TouchedLanes),
+ std::move(MainEndPoints), std::move(SubrangeEndPoints),
+ std::move(DebugSegsBefore));
+}
+
+//===----------------------------------------------------------------------===//
+// MachineLaneSSAUpdater Implementation
+//===----------------------------------------------------------------------===//
+
+Register MachineLaneSSAUpdater::addDefAndRepairNewDef(MachineInstr &NewDefMI,
+ Register OrigVReg,
+ LaneBitmask DefMask) {
+ LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::addDefAndRepairNewDef VReg=" << OrigVReg
+ << " DefMask=" << PrintLaneMask(DefMask) << "\n");
+
+ // Step 1: Index the new instruction in SlotIndexes/LIS
+ indexNewInstr(NewDefMI);
+
+ // Step 2: Extract the new SSA register from the definition instruction
+ Register NewSSAVReg = NewDefMI.defs().begin()->getReg();
+ assert(NewSSAVReg.isValid() && NewSSAVReg.isVirtual() &&
+ "NewDefMI should define a valid virtual register");
+
+ // Step 3: Derive necessary data from intact LiveIntervals
+ // The LiveInterval should already exist and be properly computed
+ if (!LIS.hasInterval(NewSSAVReg)) {
+ LIS.createAndComputeVirtRegInterval(NewSSAVReg);
+ }
+
+ // Step 4: Perform common SSA repair (PHI placement + use rewriting)
+ performSSARepair(NewSSAVReg, OrigVReg, DefMask, NewDefMI.getParent());
+
+ LLVM_DEBUG(dbgs() << " New def SSA repair complete, returning " << NewSSAVReg << "\n");
+ return NewSSAVReg;
+}
+
+Register MachineLaneSSAUpdater::addDefAndRepairAfterSpill(MachineInstr &ReloadMI,
+ Register OrigVReg,
+ LaneBitmask DefMask,
+ const CutEndPoints &EP) {
+ LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::addDefAndRepairAfterSpill VReg=" << OrigVReg
+ << " DefMask=" << PrintLaneMask(DefMask) << "\n");
+
+ // Safety checks as specified in the design
+ assert(EP.getOrigVReg() == OrigVReg &&
+ "CutEndPoints OrigVReg mismatch");
+
+ // Validate that DefMask is a subset of the lanes that were actually spilled
+ // This allows partial reloads (e.g., reload 32-bit subreg from 64-bit spill)
+ LaneBitmask SpilledLanes = LaneBitmask::getNone();
+ for (LaneBitmask TouchedMask : EP.getTouchedLaneMasks()) {
+ SpilledLanes |= TouchedMask;
+ }
+ assert((DefMask & SpilledLanes) == DefMask &&
+ "DefMask must be a subset of the lanes that were spilled");
+
+ LLVM_DEBUG(dbgs() << " DefMask=" << PrintLaneMask(DefMask)
+ << " is subset of SpilledLanes=" << PrintLaneMask(SpilledLanes) << "\n");
+
+ // Step 1: Index the reload instruction and get its SlotIndex
+ SlotIndex ReloadIdx = indexNewInstr(ReloadMI);
+ assert(ReloadIdx >= EP.getCutIdx() &&
+ "Reload index must be >= cut index");
+
+ // Step 2: Extract the new SSA register from the reload instruction
+ // The caller should have already created NewVReg and built ReloadMI with it
+ Register NewSSAVReg = ReloadMI.defs().begin()->getReg();
+ assert(NewSSAVReg.isValid() && NewSSAVReg.isVirtual() &&
+ "ReloadMI should define a valid virtual register");
+
+ // Step 3: Create and extend NewSSAVReg's LiveInterval using captured EndPoints
+ // The endpoints capture where the original register was live after the spill point
+ // We need to reconstruct this liveness for the new SSA register
+ LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(NewSSAVReg);
+
+ // Extend main live range using the captured endpoints
+ if (!EP.getMainEndPoints().empty()) {
+ LIS.extendToIndices(NewLI, EP.getMainEndPoints());
+ LLVM_DEBUG(dbgs() << " Extended NewSSA main range with " << EP.getMainEndPoints().size()
+ << " endpoints\n");
+ }
+
+ // Extend subranges for lane-aware liveness reconstruction
+ // Create subranges on-demand for each LaneMask that was captured during spill
+ for (const auto &[LaneMask, EndPoints] : EP.getSubrangeEndPoints()) {
+ if (!EndPoints.empty()) {
+ // Always create a new subrange since NewLI.subranges() is initially empty
+ LiveInterval::SubRange *NewSR = NewLI.createSubRange(LIS.getVNInfoAllocator(), LaneMask);
+
+ LIS.extendToIndices(*NewSR, EndPoints);
+ LLVM_DEBUG(dbgs() << " Created and extended NewSSA subrange " << PrintLaneMask(LaneMask)
+ << " with " << EndPoints.size() << " endpoints\n");
+ }
+ }
+
+ // Step 4: Perform common SSA repair (PHI placement + use rewriting)
+ performSSARepair(NewSSAVReg, OrigVReg, DefMask, ReloadMI.getParent());
+
+ LLVM_DEBUG(dbgs() << " SSA repair complete, returning " << NewSSAVReg << "\n");
+ return NewSSAVReg;
+}
+
+//===----------------------------------------------------------------------===//
+// Common SSA Repair Logic
+//===----------------------------------------------------------------------===//
+
+void MachineLaneSSAUpdater::performSSARepair(Register NewVReg, Register OrigVReg,
+ LaneBitmask DefMask, MachineBasicBlock *DefBB) {
+ LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::performSSARepair NewVReg=" << NewVReg
+ << " OrigVReg=" << OrigVReg << " DefMask=" << PrintLaneMask(DefMask) << "\n");
+
+ // Step 1: Use worklist-driven PHI placement
+ SmallVector<Register> AllPHIVRegs = insertLaneAwarePHI(NewVReg, OrigVReg, DefMask, DefBB);
+
+ // Step 2: Rewrite dominated uses once for each new register
+ rewriteDominatedUses(OrigVReg, NewVReg, DefMask);
+ for (Register PHIVReg : AllPHIVRegs) {
+ rewriteDominatedUses(OrigVReg, PHIVReg, DefMask);
+ }
+
+ // Step 3: Renumber values if needed
+ LiveInterval &NewLI = LIS.getInterval(NewVReg);
+ NewLI.RenumberValues();
+
+ // Also renumber PHI intervals
+ for (Register PHIVReg : AllPHIVRegs) {
+ if (LIS.hasInterval(PHIVReg)) {
+ LiveInterval &PHILI = LIS.getInterval(PHIVReg);
+ PHILI.RenumberValues();
+ }
+ }
+
+ // Also renumber original interval if it was modified
+ LiveInterval &OrigLI = LIS.getInterval(OrigVReg);
+ OrigLI.RenumberValues();
+
+ // Step 4: Verification if enabled
+ if (VerifyOnExit) {
+ LLVM_DEBUG(dbgs() << " Verifying after SSA repair...\n");
+ // TODO: Add verification calls
+ }
+
+ LLVM_DEBUG(dbgs() << " performSSARepair complete\n");
+}
+
+//===----------------------------------------------------------------------===//
+// Internal Helper Methods (Stubs)
+//===----------------------------------------------------------------------===//
+
+SlotIndex MachineLaneSSAUpdater::indexNewInstr(MachineInstr &MI) {
+ LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::indexNewInstr: " << MI);
+
+ // Register the instruction in SlotIndexes and LiveIntervals
+ // This is typically done automatically when instructions are inserted,
+ // but we need to ensure it's properly indexed
+ SlotIndexes *SI = LIS.getSlotIndexes();
+
+ // Check if instruction is already indexed
+ if (SI->hasIndex(MI)) {
+ SlotIndex Idx = SI->getInstructionIndex(MI);
+ LLVM_DEBUG(dbgs() << " Already indexed at " << Idx << "\n");
+ return Idx;
+ }
+
+ // Insert the instruction in maps - this should be done by the caller
+ // before calling our SSA repair methods, but we can verify
+ LIS.InsertMachineInstrInMaps(MI);
+
+ SlotIndex Idx = SI->getInstructionIndex(MI);
+ LLVM_DEBUG(dbgs() << " Indexed at " << Idx << "\n");
+ return Idx;
+}
+
+void MachineLaneSSAUpdater::extendPreciselyAt(const Register VReg,
+ const SmallVector<LaneBitmask, 4> &LaneMasks,
+ const MachineInstr &AtMI) {
+ LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::extendPreciselyAt VReg=" << VReg
+ << " at " << LIS.getInstructionIndex(AtMI) << "\n");
+
+ if (!VReg.isVirtual()) {
+ return; // Only handle virtual registers
+ }
+
+ SlotIndex DefIdx = LIS.getInstructionIndex(AtMI).getRegSlot();
+
+ // Create or get the LiveInterval for this register
+ LiveInterval &LI = LIS.getInterval(VReg);
+
+ // Extend the main live range to include the definition point
+ SmallVector<SlotIndex, 2> DefPoint = { DefIdx };
+ LIS.extendToIndices(LI, DefPoint);
+
+ // For each lane mask, ensure appropriate subranges exist and are extended
+ // For now, assume all lanes are valid - we'll refine this later based on register class
+ LaneBitmask RegCoverageMask = MF.getRegInfo().getMaxLaneMaskForVReg(VReg);
+
+ for (LaneBitmask LaneMask : LaneMasks) {
+ if (LaneMask == MF.getRegInfo().getMaxLaneMaskForVReg(VReg) || LaneMask == LaneBitmask::getNone()) {
+ continue; // Main range handles getAll(), skip getNone()
+ }
+
+ // Only process lanes that are valid for this register class
+ LaneBitmask ValidLanes = LaneMask & RegCoverageMask;
+ if (ValidLanes.none()) {
+ continue;
+ }
+
+ // Find or create the appropriate subrange
+ LiveInterval::SubRange *SR = nullptr;
+ for (LiveInterval::SubRange &Sub : LI.subranges()) {
+ if (Sub.LaneMask == ValidLanes) {
+ SR = ⋐
+ break;
+ }
+ }
+ if (!SR) {
+ SR = LI.createSubRange(LIS.getVNInfoAllocator(), ValidLanes);
+ }
+
+ // Extend this subrange to include the definition point
+ LIS.extendToIndices(*SR, DefPoint);
+
+ LLVM_DEBUG(dbgs() << " Extended subrange " << PrintLaneMask(ValidLanes) << "\n");
+ }
+
+ LLVM_DEBUG(dbgs() << " LiveInterval extension complete\n");
+}
+
+void MachineLaneSSAUpdater::computePrunedIDF(Register OrigVReg,
+ LaneBitmask DefMask,
+ ArrayRef<MachineBasicBlock *> NewDefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &OutIDFBlocks) {
+ LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::computePrunedIDF VReg=" << OrigVReg
+ << " DefMask=" << PrintLaneMask(DefMask)
+ << " with " << NewDefBlocks.size() << " new def blocks\n");
+
+ // Clear output vector at entry
+ OutIDFBlocks.clear();
+
+ // Early bail-out checks for robustness
+ if (!OrigVReg.isVirtual()) {
+ LLVM_DEBUG(dbgs() << " Skipping non-virtual register\n");
+ return;
+ }
+
+ if (!LIS.hasInterval(OrigVReg)) {
+ LLVM_DEBUG(dbgs() << " OrigVReg not tracked by LiveIntervals, bailing out\n");
+ return;
+ }
+
+ // Get the main LiveInterval for OrigVReg
+ LiveInterval &LI = LIS.getInterval(OrigVReg);
+
+ // Build prune set: blocks where specified lanes (DefMask) are live-in at entry
+ SmallPtrSet<MachineBasicBlock *, 32> LiveIn;
+ for (MachineBasicBlock &BB : MF) {
+ SlotIndex Start = LIS.getMBBStartIdx(&BB);
+
+ // Collect live lanes at block entry
+ LaneBitmask LiveLanes = LaneBitmask::getNone();
+
+ if (DefMask == MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg)) {
+ // For full register (e.g., reload case), check main interval
+ if (LI.liveAt(Start)) {
+ LiveLanes = MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg);
+ }
+ } else {
+ // For specific lanes, check subranges
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if (S.liveAt(Start)) {
+ LiveLanes |= S.LaneMask;
+ }
+ }
+
+ // If no subranges found but main interval is live,
+ // assume all lanes are covered by the main interval
+ if (LiveLanes == LaneBitmask::getNone() && LI.liveAt(Start)) {
+ LiveLanes = MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg);
+ }
+ }
+
+ // Check if any of the requested lanes (DefMask) are live
+ if ((LiveLanes & DefMask).any()) {
+ LiveIn.insert(&BB);
+ }
+ }
+
+ // Seed set: the blocks where new defs exist (e.g., reload or prior PHIs)
+ SmallPtrSet<MachineBasicBlock *, 8> DefBlocks;
+ for (MachineBasicBlock *B : NewDefBlocks) {
+ if (B) { // Robust to null entries
+ DefBlocks.insert(B);
+ }
+ }
+
+ // Early exit if either set is empty
+ if (DefBlocks.empty() || LiveIn.empty()) {
+ LLVM_DEBUG(dbgs() << " DefBlocks=" << DefBlocks.size() << " LiveIn=" << LiveIn.size()
+ << ", early exit\n");
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << " DefBlocks=" << DefBlocks.size() << " LiveIn=" << LiveIn.size() << "\n");
+
+ // Use LLVM's IDFCalculatorBase for MachineBasicBlock with forward dominance
+ using NodeTy = MachineBasicBlock;
+
+ // Access the underlying DomTreeBase from MachineDominatorTree
+ // MachineDominatorTree inherits from DomTreeBase<MachineBasicBlock>
+ DomTreeBase<NodeTy> &DT = MDT;
+
+ // Compute pruned IDF (forward dominance, IsPostDom=false)
+ llvm::IDFCalculatorBase<NodeTy, /*IsPostDom=*/false> IDF(DT);
+ IDF.setDefiningBlocks(DefBlocks);
+ IDF.setLiveInBlocks(LiveIn);
+ IDF.calculate(OutIDFBlocks);
+
+ LLVM_DEBUG(dbgs() << " Computed " << OutIDFBlocks.size() << " IDF blocks\n");
+
+ // Note: We do not place PHIs here; this function only computes candidate
+ // join blocks. The IDFCalculator handles deduplication automatically.
+}
+
+SmallVector<Register> MachineLaneSSAUpdater::insertLaneAwarePHI(Register InitialVReg,
+ Register OrigVReg,
+ LaneBitmask DefMask,
+ MachineBasicBlock *InitialDefBB) {
+ LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::insertLaneAwarePHI InitialVReg=" << InitialVReg
+ << " OrigVReg=" << OrigVReg << " DefMask=" << PrintLaneMask(DefMask) << "\n");
+
+ // Worklist item: (VReg, DefBB) pairs that need PHI placement
+ struct WorkItem {
+ Register VReg;
+ MachineBasicBlock *DefBB;
+ WorkItem(Register V, MachineBasicBlock *BB) : VReg(V), DefBB(BB) {}
+ };
+
+ SmallVector<Register> AllCreatedPHIs;
+ SmallVector<WorkItem> Worklist;
+ DenseSet<MachineBasicBlock *> ProcessedBlocks; // Avoid duplicate PHIs in same block
+
+ // Seed worklist with initial definition
+ Worklist.emplace_back(InitialVReg, InitialDefBB);
+
+ LLVM_DEBUG(dbgs() << " Starting worklist processing...\n");
+
+ while (!Worklist.empty()) {
+ WorkItem Item = Worklist.pop_back_val();
+
+ LLVM_DEBUG(dbgs() << " Processing VReg=" << Item.VReg
+ << " DefBB=#" << Item.DefBB->getNumber() << "\n");
+
+ // Step 1: Compute pruned IDF for this definition
+ SmallVector<MachineBasicBlock *> DefBlocks = {Item.DefBB};
+ SmallVector<MachineBasicBlock *> IDFBlocks;
+ computePrunedIDF(OrigVReg, DefMask, DefBlocks, IDFBlocks);
+
+ LLVM_DEBUG(dbgs() << " Found " << IDFBlocks.size() << " IDF blocks\n");
+
+ // Step 2: Create PHIs in each IDF block
+ for (MachineBasicBlock *JoinMBB : IDFBlocks) {
+ // Skip if we already processed this join block (avoid duplicate PHIs)
+ if (ProcessedBlocks.contains(JoinMBB)) {
+ LLVM_DEBUG(dbgs() << " Skipping already processed BB#" << JoinMBB->getNumber() << "\n");
+ continue;
+ }
+ ProcessedBlocks.insert(JoinMBB);
+
+ LLVM_DEBUG(dbgs() << " Creating PHI in BB#" << JoinMBB->getNumber() << "\n");
+
+ // Create PHI using the original per-edge analysis logic
+ SmallVector<Register> PHIResults = createPHIInBlock(*JoinMBB, OrigVReg, Item.VReg, DefMask);
+
+ // Add PHI results to worklist for further processing and to result collection
+ for (Register PHIVReg : PHIResults) {
+ if (PHIVReg.isValid()) {
+ Worklist.emplace_back(PHIVReg, JoinMBB);
+ AllCreatedPHIs.push_back(PHIVReg);
+ LLVM_DEBUG(dbgs() << " Created PHI result VReg=" << PHIVReg
+ << ", added to worklist\n");
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " Worklist processing complete. Created "
+ << AllCreatedPHIs.size() << " PHI registers total.\n");
+
+ return AllCreatedPHIs;
+}
+
+// Helper: Create PHI in a specific block (extracted from previous implementation)
+SmallVector<Register> MachineLaneSSAUpdater::createPHIInBlock(MachineBasicBlock &JoinMBB,
+ Register OrigVReg,
+ Register NewVReg,
+ LaneBitmask ResultMask) {
+ LLVM_DEBUG(dbgs() << " createPHIInBlock in BB#" << JoinMBB.getNumber()
+ << " OrigVReg=" << OrigVReg << " NewVReg=" << NewVReg
+ << " ResultMask=" << PrintLaneMask(ResultMask) << "\n");
+
+ // Get the LiveIntervals for both old and new registers
+ LiveInterval &OldLI = LIS.getInterval(OrigVReg);
+ LiveInterval &NewLI = LIS.getInterval(NewVReg);
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const LaneBitmask FullMask = MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg);
+ const LaneBitmask NonReloadedMask = FullMask & ~ResultMask;
+ const unsigned NoRegister = 0; // Target-independent equivalent to AMDGPU::NoRegister
+
+ // Analyze each predecessor edge to determine operand sources
+ SmallVector<MachineOperand> NewVRegOps; // Operands for NewVReg PHI
+ SmallVector<MachineOperand> OldVRegOps; // Operands for OrigVReg PHI
+
+ LaneBitmask NewCommonMask = MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg); // intersection across preds
+ LaneBitmask NewUnionMask = LaneBitmask::getNone(); // union across preds
+ LaneBitmask OldCommonMask = MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg);
+ LaneBitmask OldUnionMask = LaneBitmask::getNone();
+
+ LLVM_DEBUG(dbgs() << " Analyzing " << JoinMBB.pred_size() << " predecessors:\n");
+
+ for (MachineBasicBlock *PredMBB : JoinMBB.predecessors()) {
+ SlotIndex PredEndIdx = LIS.getMBBEndIdx(PredMBB);
+
+ // Analyze NewVReg lanes on this edge
+ LaneBitmask NewEdgeMask = LaneBitmask::getNone();
+ if (VNInfo *NewVN = NewLI.getVNInfoBefore(PredEndIdx)) {
+ // Check which lanes of NewVReg are live-out via subrange analysis
+ for (const LiveInterval::SubRange &SR : NewLI.subranges()) {
+ if (SR.getVNInfoBefore(PredEndIdx)) {
+ NewEdgeMask |= SR.LaneMask;
+ }
+ }
+
+ // If no subranges but main range is live, assume all reloaded lanes
+ if (NewEdgeMask.none()) {
+ NewEdgeMask = ResultMask;
+ }
+
+ // NewVReg can only contribute reloaded lanes
+ NewEdgeMask &= ResultMask;
+
+ LLVM_DEBUG(dbgs() << " Pred BB#" << PredMBB->getNumber()
+ << " NewVReg lanes: " << PrintLaneMask(NewEdgeMask)
+ << " (VN=" << NewVN->id << ")\n");
+ }
+
+ // Analyze OrigVReg lanes on this edge
+ LaneBitmask OldEdgeMask = LaneBitmask::getNone();
+ if (VNInfo *OldVN = OldLI.getVNInfoBefore(PredEndIdx)) {
+ // Check which lanes of OrigVReg are live-out via subrange analysis
+ for (const LiveInterval::SubRange &SR : OldLI.subranges()) {
+ if (SR.getVNInfoBefore(PredEndIdx)) {
+ OldEdgeMask |= SR.LaneMask;
+ }
+ }
+
+ // If no subranges but main range is live, assume all non-reloaded lanes
+ if (OldEdgeMask.none()) {
+ OldEdgeMask = NonReloadedMask;
+ }
+
+ // OrigVReg can only contribute non-reloaded lanes
+ OldEdgeMask &= NonReloadedMask;
+
+ LLVM_DEBUG(dbgs() << " Pred BB#" << PredMBB->getNumber()
+ << " OrigVReg lanes: " << PrintLaneMask(OldEdgeMask)
+ << " (VN=" << OldVN->id << ")\n");
+ }
+
+ // Update mask statistics
+ NewCommonMask &= NewEdgeMask;
+ NewUnionMask |= NewEdgeMask;
+ OldCommonMask &= OldEdgeMask;
+ OldUnionMask |= OldEdgeMask;
+
+ // Create operands for NewVReg PHI if this edge contributes
+ if (NewEdgeMask.any()) {
+ unsigned SubIdx = NoRegister;
+ if ((ResultMask & ~NewEdgeMask).any()) { // partial register incoming
+ // TODO: Implement getSubRegIndexForLaneMask or equivalent
+ // SubIdx = getSubRegIndexForLaneMask(NewEdgeMask, &TRI);
+ }
+
+ NewVRegOps.push_back(MachineOperand::CreateReg(NewVReg, /*isDef*/ false,
+ /*isImp*/ false, /*isKill*/ false,
+ /*isDead*/ false, /*isUndef*/ false,
+ /*isEarlyClobber*/ false, SubIdx));
+ NewVRegOps.push_back(MachineOperand::CreateMBB(PredMBB));
+ }
+
+ // Create operands for OrigVReg PHI if this edge contributes
+ if (OldEdgeMask.any()) {
+ unsigned SubIdx = NoRegister;
+ if ((NonReloadedMask & ~OldEdgeMask).any()) { // partial register incoming
+ // TODO: Implement getSubRegIndexForLaneMask or equivalent
+ // SubIdx = getSubRegIndexForLaneMask(OldEdgeMask, &TRI);
+ }
+
+ OldVRegOps.push_back(MachineOperand::CreateReg(OrigVReg, /*isDef*/ false,
+ /*isImp*/ false, /*isKill*/ false,
+ /*isDead*/ false, /*isUndef*/ false,
+ /*isEarlyClobber*/ false, SubIdx));
+ OldVRegOps.push_back(MachineOperand::CreateMBB(PredMBB));
+ }
+ }
+
+ // Decide PHI mask strategies using CommonMask/UnionMask logic
+ LaneBitmask NewPhiMask = (NewCommonMask.none() ? NewUnionMask : NewCommonMask);
+ LaneBitmask OldPhiMask = (OldCommonMask.none() ? OldUnionMask : OldCommonMask);
+
+ if (NewPhiMask.none()) NewPhiMask = ResultMask;
+ if (OldPhiMask.none()) OldPhiMask = NonReloadedMask;
+
+ LLVM_DEBUG(dbgs() << " Analysis: NewPhiMask=" << PrintLaneMask(NewPhiMask)
+ << " OldPhiMask=" << PrintLaneMask(OldPhiMask) << "\n");
+
+ SmallVector<Register> ResultVRegs;
+
+ // Create PHI(s) based on what we need
+ if (NewUnionMask.any() && OldUnionMask.any()) {
+ LLVM_DEBUG(dbgs() << " Complex case: Creating separate PHIs for NewVReg and OrigVReg\n");
+
+ // Create PHI for NewVReg lanes
+ if (!NewVRegOps.empty()) {
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(NewVReg);
+ Register NewPHIVReg = MF.getRegInfo().createVirtualRegister(RC);
+
+ auto NewPHINode = BuildMI(JoinMBB, JoinMBB.begin(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewPHIVReg);
+ for (const MachineOperand &Op : NewVRegOps) {
+ NewPHINode.add(Op);
+ }
+
+ MachineInstr *NewPHI = NewPHINode.getInstr();
+ LIS.InsertMachineInstrInMaps(*NewPHI);
+ LIS.createAndComputeVirtRegInterval(NewPHIVReg);
+
+ ResultVRegs.push_back(NewPHIVReg);
+ LLVM_DEBUG(dbgs() << " Created NewVReg PHI: ");
+ LLVM_DEBUG(NewPHI->print(dbgs()));
+ }
+
+ // Create PHI for OrigVReg lanes
+ if (!OldVRegOps.empty()) {
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigVReg);
+ Register OldPHIVReg = MF.getRegInfo().createVirtualRegister(RC);
+
+ auto OldPHINode = BuildMI(JoinMBB, JoinMBB.begin(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), OldPHIVReg);
+ for (const MachineOperand &Op : OldVRegOps) {
+ OldPHINode.add(Op);
+ }
+
+ MachineInstr *OldPHI = OldPHINode.getInstr();
+ LIS.InsertMachineInstrInMaps(*OldPHI);
+ LIS.createAndComputeVirtRegInterval(OldPHIVReg);
+
+ ResultVRegs.push_back(OldPHIVReg);
+ LLVM_DEBUG(dbgs() << " Created OrigVReg PHI: ");
+ LLVM_DEBUG(OldPHI->print(dbgs()));
+ }
+
+ } else if (NewUnionMask.any()) {
+ LLVM_DEBUG(dbgs() << " Simple case: Creating PHI for NewVReg lanes only\n");
+
+ // Create result register and PHI for NewVReg
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(NewVReg);
+ Register PHIVReg = MF.getRegInfo().createVirtualRegister(RC);
+
+ auto PHINode = BuildMI(JoinMBB, JoinMBB.begin(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), PHIVReg);
+ for (const MachineOperand &Op : NewVRegOps) {
+ PHINode.add(Op);
+ }
+
+ MachineInstr *PHI = PHINode.getInstr();
+ LIS.InsertMachineInstrInMaps(*PHI);
+ LIS.createAndComputeVirtRegInterval(PHIVReg);
+
+ ResultVRegs.push_back(PHIVReg);
+ LLVM_DEBUG(dbgs() << " Created NewVReg PHI: ");
+ LLVM_DEBUG(PHI->print(dbgs()));
+
+ } else if (OldUnionMask.any()) {
+ LLVM_DEBUG(dbgs() << " Simple case: Creating PHI for OrigVReg lanes only\n");
+
+ // Create result register and PHI for OrigVReg
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigVReg);
+ Register PHIVReg = MF.getRegInfo().createVirtualRegister(RC);
+
+ auto PHINode = BuildMI(JoinMBB, JoinMBB.begin(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), PHIVReg);
+ for (const MachineOperand &Op : OldVRegOps) {
+ PHINode.add(Op);
+ }
+
+ MachineInstr *PHI = PHINode.getInstr();
+ LIS.InsertMachineInstrInMaps(*PHI);
+ LIS.createAndComputeVirtRegInterval(PHIVReg);
+
+ ResultVRegs.push_back(PHIVReg);
+ LLVM_DEBUG(dbgs() << " Created OrigVReg PHI: ");
+ LLVM_DEBUG(PHI->print(dbgs()));
+
+ } else {
+ LLVM_DEBUG(dbgs() << " No lanes live-out from any predecessor - unusual case\n");
+ }
+
+ return ResultVRegs;
+}
+
+void MachineLaneSSAUpdater::rewriteDominatedUses(Register OrigVReg,
+ Register NewSSA,
+ LaneBitmask MaskToRewrite) {
+ LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::rewriteDominatedUses OrigVReg=" << OrigVReg
+ << " NewSSA=" << NewSSA << " Mask=" << PrintLaneMask(MaskToRewrite) << "\n");
+
+ // TODO: Implement dominated use rewriting
+ // This should handle exact/subset/super policy:
+ // - Exact match: direct replacement
+ // - Subset: create REG_SEQUENCE combining old + new
+ // - Super: extract subregister from new def
+ // Preserve undef/dead flags, never mass-clear on partial defs
+}
\ No newline at end of file
>From cde5ec8cf6f2855c1a0a6767a0e983776f0a2b2c Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Mon, 6 Oct 2025 17:44:31 +0000
Subject: [PATCH 2/7] [CodeGen] Refactor and implement MachineLaneSSAUpdater
for lane-aware SSA repair
This commit implements a comprehensive lane-aware SSA repair utility for
Machine IR with the following key changes:
- Simplify PHI creation logic: Replace complex per-edge lane analysis with
simplified single PHI creation for reload scenarios
- Add utility function getSubRegIndexForLaneMask() for lane-to-subregister
mapping with proper inline implementation
- Implement complete dominated use rewriting with three-case policy:
* Exact match: direct register replacement
* Super/Mixed: REG_SEQUENCE construction for multi-lane uses
* Subset: preserve subregister indices for partial uses
- Add comprehensive internal helper methods:
* incomingOnEdge() for PHI operand VNInfo analysis
* reachedByThisVNI() for dominance and same-block ordering
* operandLaneMask() for lane mask calculation
* buildRSForSuperUse() for REG_SEQUENCE construction
* extendAt() for precise LiveInterval extension
- Refactor header organization: move from forward declarations to inline
implementations where appropriate and add missing includes
The implementation focuses on correctness for spill-reload scenarios while
maintaining lane-level precision for subregister operations.
---
.../llvm/CodeGen/MachineLaneSSAUpdater.h | 43 +-
llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp | 456 ++++++++++--------
2 files changed, 292 insertions(+), 207 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
index fb260a1d27bb1..b606124d4167e 100644
--- a/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
+++ b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/Register.h" // Register
#include "llvm/CodeGen/SlotIndexes.h" // SlotIndex
#include "llvm/CodeGen/LiveInterval.h" // LiveRange
+#include "llvm/CodeGen/TargetRegisterInfo.h" // For inline function
namespace llvm {
@@ -29,7 +30,6 @@ class MachineInstr;
class LiveIntervals;
class LiveRange;
class MachineDominatorTree;
-class TargetRegisterInfo;
class MachinePostDominatorTree; // optional if you choose to use it
//===----------------------------------------------------------------------===//
@@ -140,8 +140,7 @@ class MachineLaneSSAUpdater {
MachineLaneSSAUpdater &setVerifyOnExit(bool Enable) {
VerifyOnExit = Enable; return *this; }
-private:
- // --- Internal helpers (declarations only; implement in .cpp) ---
+ // --- Internal helpers ---
// Index MI in SlotIndexes / LIS maps immediately after insertion.
// Returns the SlotIndex assigned to the instruction.
@@ -169,10 +168,9 @@ class MachineLaneSSAUpdater {
MachineBasicBlock *InitialDefBB);
// Helper: Create PHI in a specific block with per-edge lane analysis
- SmallVector<Register> createPHIInBlock(MachineBasicBlock &JoinMBB,
- Register OrigVReg,
- Register NewVReg,
- LaneBitmask ResultMask);
+ Register createPHIInBlock(MachineBasicBlock &JoinMBB,
+ Register OrigVReg,
+ Register NewVReg);
// Rewrite dominated uses of OrigVReg to NewSSA according to the
// exact/subset/super policy; create REG_SEQUENCE only when needed.
@@ -180,6 +178,17 @@ class MachineLaneSSAUpdater {
Register NewSSA,
LaneBitmask MaskToRewrite);
+ // Internal helper methods for use rewriting
+ VNInfo *incomingOnEdge(LiveInterval &LI, MachineInstr *Phi, MachineOperand &PhiOp);
+ bool reachedByThisVNI(LiveInterval &LI, MachineInstr *DefMI, MachineInstr *UseMI,
+ MachineOperand &UseOp, VNInfo *VNI);
+ LaneBitmask operandLaneMask(const MachineOperand &MO);
+ Register buildRSForSuperUse(MachineInstr *UseMI, MachineOperand &MO,
+ Register OldVR, Register NewVR, LaneBitmask MaskToRewrite,
+ LiveInterval &LI, const TargetRegisterClass *OpRC,
+ SlotIndex &OutIdx, SmallVectorImpl<LaneBitmask> &LanesToExtend);
+ void extendAt(LiveInterval &LI, SlotIndex Idx, ArrayRef<LaneBitmask> Lanes);
+
// --- Data members ---
MachineFunction &MF;
LiveIntervals &LIS;
@@ -190,6 +199,26 @@ class MachineLaneSSAUpdater {
bool VerifyOnExit = true; // run MF.verify()/LI.verify() at end
};
+/// Get the subregister index that corresponds to the given lane mask.
+/// \param Mask The lane mask to convert to a subregister index
+/// \param TRI The target register info (provides target-specific subregister mapping)
+/// \return The subregister index, or 0 if no single subregister matches
+inline unsigned getSubRegIndexForLaneMask(LaneBitmask Mask, const TargetRegisterInfo *TRI) {
+ if (Mask.none())
+ return 0; // No subregister
+
+ // Iterate through all subregister indices to find a match
+ for (unsigned SubIdx = 1; SubIdx < TRI->getNumSubRegIndices(); ++SubIdx) {
+ LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubIdx);
+ if (SubMask == Mask) {
+ return SubIdx;
+ }
+ }
+
+ // No exact match found - this might be a composite mask requiring REG_SEQUENCE
+ return 0;
+}
+
// DenseMapInfo specialization for LaneBitmask
template<>
struct DenseMapInfo<LaneBitmask> {
diff --git a/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
index 14707871623de..4f0e0b5bfca27 100644
--- a/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
@@ -520,16 +520,14 @@ SmallVector<Register> MachineLaneSSAUpdater::insertLaneAwarePHI(Register Initial
LLVM_DEBUG(dbgs() << " Creating PHI in BB#" << JoinMBB->getNumber() << "\n");
// Create PHI using the original per-edge analysis logic
- SmallVector<Register> PHIResults = createPHIInBlock(*JoinMBB, OrigVReg, Item.VReg, DefMask);
+ Register PHIResult = createPHIInBlock(*JoinMBB, OrigVReg, Item.VReg);
- // Add PHI results to worklist for further processing and to result collection
- for (Register PHIVReg : PHIResults) {
- if (PHIVReg.isValid()) {
- Worklist.emplace_back(PHIVReg, JoinMBB);
- AllCreatedPHIs.push_back(PHIVReg);
- LLVM_DEBUG(dbgs() << " Created PHI result VReg=" << PHIVReg
- << ", added to worklist\n");
- }
+ // Add PHI result to worklist for further processing and to result collection
+ if (PHIResult.isValid()) {
+ Worklist.emplace_back(PHIResult, JoinMBB);
+ AllCreatedPHIs.push_back(PHIResult);
+ LLVM_DEBUG(dbgs() << " Created PHI result VReg=" << PHIResult
+ << ", added to worklist\n");
}
}
}
@@ -540,187 +538,67 @@ SmallVector<Register> MachineLaneSSAUpdater::insertLaneAwarePHI(Register Initial
return AllCreatedPHIs;
}
-// Helper: Create PHI in a specific block (extracted from previous implementation)
-SmallVector<Register> MachineLaneSSAUpdater::createPHIInBlock(MachineBasicBlock &JoinMBB,
- Register OrigVReg,
- Register NewVReg,
- LaneBitmask ResultMask) {
+// Helper: Create lane-specific PHI in a join block
+Register MachineLaneSSAUpdater::createPHIInBlock(MachineBasicBlock &JoinMBB,
+ Register OrigVReg,
+ Register NewVReg) {
LLVM_DEBUG(dbgs() << " createPHIInBlock in BB#" << JoinMBB.getNumber()
- << " OrigVReg=" << OrigVReg << " NewVReg=" << NewVReg
- << " ResultMask=" << PrintLaneMask(ResultMask) << "\n");
-
- // Get the LiveIntervals for both old and new registers
- LiveInterval &OldLI = LIS.getInterval(OrigVReg);
- LiveInterval &NewLI = LIS.getInterval(NewVReg);
+ << " OrigVReg=" << OrigVReg << " NewVReg=" << NewVReg << "\n");
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const LaneBitmask FullMask = MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg);
- const LaneBitmask NonReloadedMask = FullMask & ~ResultMask;
- const unsigned NoRegister = 0; // Target-independent equivalent to AMDGPU::NoRegister
- // Analyze each predecessor edge to determine operand sources
- SmallVector<MachineOperand> NewVRegOps; // Operands for NewVReg PHI
- SmallVector<MachineOperand> OldVRegOps; // Operands for OrigVReg PHI
+ // Derive DefMask from NewVReg's register class (matches reload size)
+ const LaneBitmask ReloadMask = MF.getRegInfo().getMaxLaneMaskForVReg(NewVReg);
+ const bool IsPartialReload = (FullMask & ~ReloadMask).any();
- LaneBitmask NewCommonMask = MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg); // intersection across preds
- LaneBitmask NewUnionMask = LaneBitmask::getNone(); // union across preds
- LaneBitmask OldCommonMask = MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg);
- LaneBitmask OldUnionMask = LaneBitmask::getNone();
+ // Collect PHI operands for the specific reload lanes
+ SmallVector<MachineOperand> PHIOperands;
+ LiveInterval &NewLI = LIS.getInterval(NewVReg);
- LLVM_DEBUG(dbgs() << " Analyzing " << JoinMBB.pred_size() << " predecessors:\n");
+ LLVM_DEBUG(dbgs() << " Creating PHI for " << (IsPartialReload ? "partial reload" : "full reload")
+ << " ReloadMask=" << PrintLaneMask(ReloadMask) << "\n");
for (MachineBasicBlock *PredMBB : JoinMBB.predecessors()) {
- SlotIndex PredEndIdx = LIS.getMBBEndIdx(PredMBB);
+ // Check if NewVReg (reloaded register) is live-out from this predecessor
+ bool NewVRegLive = LIS.isLiveOutOfMBB(NewLI, PredMBB);
- // Analyze NewVReg lanes on this edge
- LaneBitmask NewEdgeMask = LaneBitmask::getNone();
- if (VNInfo *NewVN = NewLI.getVNInfoBefore(PredEndIdx)) {
- // Check which lanes of NewVReg are live-out via subrange analysis
- for (const LiveInterval::SubRange &SR : NewLI.subranges()) {
- if (SR.getVNInfoBefore(PredEndIdx)) {
- NewEdgeMask |= SR.LaneMask;
- }
- }
-
- // If no subranges but main range is live, assume all reloaded lanes
- if (NewEdgeMask.none()) {
- NewEdgeMask = ResultMask;
- }
-
- // NewVReg can only contribute reloaded lanes
- NewEdgeMask &= ResultMask;
-
+ if (NewVRegLive) {
+ // This is the reload path - use NewVReg (always full register for its class)
LLVM_DEBUG(dbgs() << " Pred BB#" << PredMBB->getNumber()
- << " NewVReg lanes: " << PrintLaneMask(NewEdgeMask)
- << " (VN=" << NewVN->id << ")\n");
- }
-
- // Analyze OrigVReg lanes on this edge
- LaneBitmask OldEdgeMask = LaneBitmask::getNone();
- if (VNInfo *OldVN = OldLI.getVNInfoBefore(PredEndIdx)) {
- // Check which lanes of OrigVReg are live-out via subrange analysis
- for (const LiveInterval::SubRange &SR : OldLI.subranges()) {
- if (SR.getVNInfoBefore(PredEndIdx)) {
- OldEdgeMask |= SR.LaneMask;
- }
- }
-
- // If no subranges but main range is live, assume all non-reloaded lanes
- if (OldEdgeMask.none()) {
- OldEdgeMask = NonReloadedMask;
- }
+ << " contributes NewVReg (reload path)\n");
- // OrigVReg can only contribute non-reloaded lanes
- OldEdgeMask &= NonReloadedMask;
+ PHIOperands.push_back(MachineOperand::CreateReg(NewVReg, /*isDef*/ false));
+ PHIOperands.push_back(MachineOperand::CreateMBB(PredMBB));
+ } else {
+ // This is the original path - use OrigVReg with appropriate subregister
LLVM_DEBUG(dbgs() << " Pred BB#" << PredMBB->getNumber()
- << " OrigVReg lanes: " << PrintLaneMask(OldEdgeMask)
- << " (VN=" << OldVN->id << ")\n");
- }
-
- // Update mask statistics
- NewCommonMask &= NewEdgeMask;
- NewUnionMask |= NewEdgeMask;
- OldCommonMask &= OldEdgeMask;
- OldUnionMask |= OldEdgeMask;
-
- // Create operands for NewVReg PHI if this edge contributes
- if (NewEdgeMask.any()) {
- unsigned SubIdx = NoRegister;
- if ((ResultMask & ~NewEdgeMask).any()) { // partial register incoming
- // TODO: Implement getSubRegIndexForLaneMask or equivalent
- // SubIdx = getSubRegIndexForLaneMask(NewEdgeMask, &TRI);
- }
+ << " contributes OrigVReg (original path)\n");
- NewVRegOps.push_back(MachineOperand::CreateReg(NewVReg, /*isDef*/ false,
- /*isImp*/ false, /*isKill*/ false,
- /*isDead*/ false, /*isUndef*/ false,
- /*isEarlyClobber*/ false, SubIdx));
- NewVRegOps.push_back(MachineOperand::CreateMBB(PredMBB));
- }
-
- // Create operands for OrigVReg PHI if this edge contributes
- if (OldEdgeMask.any()) {
- unsigned SubIdx = NoRegister;
- if ((NonReloadedMask & ~OldEdgeMask).any()) { // partial register incoming
- // TODO: Implement getSubRegIndexForLaneMask or equivalent
- // SubIdx = getSubRegIndexForLaneMask(OldEdgeMask, &TRI);
+ if (IsPartialReload) {
+ // Partial case: z = PHI(y, BB1, x.sub0, BB0)
+ unsigned SubIdx = getSubRegIndexForLaneMask(ReloadMask, &TRI);
+ PHIOperands.push_back(MachineOperand::CreateReg(OrigVReg, /*isDef*/ false,
+ /*isImp*/ false, /*isKill*/ false,
+ /*isDead*/ false, /*isUndef*/ false,
+ /*isEarlyClobber*/ false, SubIdx));
+ } else {
+ // Full register case: z = PHI(y, BB1, x, BB0)
+ PHIOperands.push_back(MachineOperand::CreateReg(OrigVReg, /*isDef*/ false));
}
-
- OldVRegOps.push_back(MachineOperand::CreateReg(OrigVReg, /*isDef*/ false,
- /*isImp*/ false, /*isKill*/ false,
- /*isDead*/ false, /*isUndef*/ false,
- /*isEarlyClobber*/ false, SubIdx));
- OldVRegOps.push_back(MachineOperand::CreateMBB(PredMBB));
+ PHIOperands.push_back(MachineOperand::CreateMBB(PredMBB));
}
}
- // Decide PHI mask strategies using CommonMask/UnionMask logic
- LaneBitmask NewPhiMask = (NewCommonMask.none() ? NewUnionMask : NewCommonMask);
- LaneBitmask OldPhiMask = (OldCommonMask.none() ? OldUnionMask : OldCommonMask);
-
- if (NewPhiMask.none()) NewPhiMask = ResultMask;
- if (OldPhiMask.none()) OldPhiMask = NonReloadedMask;
-
- LLVM_DEBUG(dbgs() << " Analysis: NewPhiMask=" << PrintLaneMask(NewPhiMask)
- << " OldPhiMask=" << PrintLaneMask(OldPhiMask) << "\n");
-
- SmallVector<Register> ResultVRegs;
-
- // Create PHI(s) based on what we need
- if (NewUnionMask.any() && OldUnionMask.any()) {
- LLVM_DEBUG(dbgs() << " Complex case: Creating separate PHIs for NewVReg and OrigVReg\n");
-
- // Create PHI for NewVReg lanes
- if (!NewVRegOps.empty()) {
- const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(NewVReg);
- Register NewPHIVReg = MF.getRegInfo().createVirtualRegister(RC);
-
- auto NewPHINode = BuildMI(JoinMBB, JoinMBB.begin(), DebugLoc(),
- TII->get(TargetOpcode::PHI), NewPHIVReg);
- for (const MachineOperand &Op : NewVRegOps) {
- NewPHINode.add(Op);
- }
-
- MachineInstr *NewPHI = NewPHINode.getInstr();
- LIS.InsertMachineInstrInMaps(*NewPHI);
- LIS.createAndComputeVirtRegInterval(NewPHIVReg);
-
- ResultVRegs.push_back(NewPHIVReg);
- LLVM_DEBUG(dbgs() << " Created NewVReg PHI: ");
- LLVM_DEBUG(NewPHI->print(dbgs()));
- }
-
- // Create PHI for OrigVReg lanes
- if (!OldVRegOps.empty()) {
- const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigVReg);
- Register OldPHIVReg = MF.getRegInfo().createVirtualRegister(RC);
-
- auto OldPHINode = BuildMI(JoinMBB, JoinMBB.begin(), DebugLoc(),
- TII->get(TargetOpcode::PHI), OldPHIVReg);
- for (const MachineOperand &Op : OldVRegOps) {
- OldPHINode.add(Op);
- }
-
- MachineInstr *OldPHI = OldPHINode.getInstr();
- LIS.InsertMachineInstrInMaps(*OldPHI);
- LIS.createAndComputeVirtRegInterval(OldPHIVReg);
-
- ResultVRegs.push_back(OldPHIVReg);
- LLVM_DEBUG(dbgs() << " Created OrigVReg PHI: ");
- LLVM_DEBUG(OldPHI->print(dbgs()));
- }
-
- } else if (NewUnionMask.any()) {
- LLVM_DEBUG(dbgs() << " Simple case: Creating PHI for NewVReg lanes only\n");
-
- // Create result register and PHI for NewVReg
+ // Create the single lane-specific PHI
+ if (!PHIOperands.empty()) {
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(NewVReg);
Register PHIVReg = MF.getRegInfo().createVirtualRegister(RC);
auto PHINode = BuildMI(JoinMBB, JoinMBB.begin(), DebugLoc(),
TII->get(TargetOpcode::PHI), PHIVReg);
- for (const MachineOperand &Op : NewVRegOps) {
+ for (const MachineOperand &Op : PHIOperands) {
PHINode.add(Op);
}
@@ -728,36 +606,12 @@ SmallVector<Register> MachineLaneSSAUpdater::createPHIInBlock(MachineBasicBlock
LIS.InsertMachineInstrInMaps(*PHI);
LIS.createAndComputeVirtRegInterval(PHIVReg);
- ResultVRegs.push_back(PHIVReg);
- LLVM_DEBUG(dbgs() << " Created NewVReg PHI: ");
+ LLVM_DEBUG(dbgs() << " Created lane-specific PHI: ");
LLVM_DEBUG(PHI->print(dbgs()));
-
- } else if (OldUnionMask.any()) {
- LLVM_DEBUG(dbgs() << " Simple case: Creating PHI for OrigVReg lanes only\n");
-
- // Create result register and PHI for OrigVReg
- const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigVReg);
- Register PHIVReg = MF.getRegInfo().createVirtualRegister(RC);
-
- auto PHINode = BuildMI(JoinMBB, JoinMBB.begin(), DebugLoc(),
- TII->get(TargetOpcode::PHI), PHIVReg);
- for (const MachineOperand &Op : OldVRegOps) {
- PHINode.add(Op);
- }
-
- MachineInstr *PHI = PHINode.getInstr();
- LIS.InsertMachineInstrInMaps(*PHI);
- LIS.createAndComputeVirtRegInterval(PHIVReg);
-
- ResultVRegs.push_back(PHIVReg);
- LLVM_DEBUG(dbgs() << " Created OrigVReg PHI: ");
- LLVM_DEBUG(PHI->print(dbgs()));
-
- } else {
- LLVM_DEBUG(dbgs() << " No lanes live-out from any predecessor - unusual case\n");
+ return PHIVReg;
}
- return ResultVRegs;
+ return Register();
}
void MachineLaneSSAUpdater::rewriteDominatedUses(Register OrigVReg,
@@ -766,10 +620,212 @@ void MachineLaneSSAUpdater::rewriteDominatedUses(Register OrigVReg,
LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::rewriteDominatedUses OrigVReg=" << OrigVReg
<< " NewSSA=" << NewSSA << " Mask=" << PrintLaneMask(MaskToRewrite) << "\n");
- // TODO: Implement dominated use rewriting
- // This should handle exact/subset/super policy:
- // - Exact match: direct replacement
- // - Subset: create REG_SEQUENCE combining old + new
- // - Super: extract subregister from new def
- // Preserve undef/dead flags, never mass-clear on partial defs
-}
\ No newline at end of file
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Find the definition instruction for NewSSA
+ MachineInstr *DefMI = MRI.getVRegDef(NewSSA);
+ if (!DefMI) {
+ LLVM_DEBUG(dbgs() << " No definition found for NewSSA, skipping\n");
+ return;
+ }
+
+ // Get the LiveInterval and VNInfo for the definition
+ LiveInterval &LI = LIS.getInterval(OrigVReg);
+ SlotIndex DefIdx = LIS.getInstructionIndex(*DefMI).getRegSlot();
+ VNInfo *VNI = LI.getVNInfoAt(DefIdx);
+ if (!VNI) {
+ LLVM_DEBUG(dbgs() << " No VNInfo found for definition, skipping\n");
+ return;
+ }
+
+ const TargetRegisterClass *NewRC = MRI.getRegClass(NewSSA);
+
+ LLVM_DEBUG(dbgs() << " Rewriting uses reached by VNI " << VNI->id << " from: ");
+ LLVM_DEBUG(DefMI->print(dbgs()));
+
+ // Iterate through all uses of OrigVReg
+ for (MachineOperand &MO : llvm::make_early_inc_range(MRI.use_operands(OrigVReg))) {
+ MachineInstr *UseMI = MO.getParent();
+
+ // Skip the definition instruction itself
+ if (UseMI == DefMI)
+ continue;
+
+ // Check if this use is reached by our VNI
+ if (!reachedByThisVNI(LI, DefMI, UseMI, MO, VNI))
+ continue;
+
+ // Get the lane mask for this operand
+ LaneBitmask OpMask = operandLaneMask(MO);
+ if ((OpMask & MaskToRewrite).none())
+ continue;
+
+ LLVM_DEBUG(dbgs() << " Processing use with OpMask=" << PrintLaneMask(OpMask) << ": ");
+ LLVM_DEBUG(UseMI->print(dbgs()));
+
+ const TargetRegisterClass *OpRC = MRI.getRegClass(OrigVReg);
+
+ // Case 1: Exact match - direct replacement
+ if (OpMask == MaskToRewrite) {
+ // Check register class compatibility
+ if (TRI.getCommonSubClass(NewRC, OpRC)) {
+ LLVM_DEBUG(dbgs() << " Exact match -> direct replacement\n");
+ MO.setReg(NewSSA);
+ MO.setSubReg(0); // Clear subregister
+ continue;
+ }
+
+ // Incompatible register classes with same lane mask indicates corrupted MIR
+ llvm_unreachable("Incompatible register classes with same lane mask - invalid MIR");
+ }
+
+ // Case 2: Super/Mixed - use needs more lanes than we're rewriting
+ if ((OpMask & ~MaskToRewrite).any()) {
+ LLVM_DEBUG(dbgs() << " Super/Mixed case -> building REG_SEQUENCE\n");
+
+ SmallVector<LaneBitmask, 4> LanesToExtend;
+ SlotIndex RSIdx;
+ Register RSReg = buildRSForSuperUse(UseMI, MO, OrigVReg, NewSSA, MaskToRewrite,
+ LI, OpRC, RSIdx, LanesToExtend);
+ extendAt(LI, RSIdx, LanesToExtend);
+ MO.setReg(RSReg);
+ MO.setSubReg(0);
+
+ } else {
+ // Case 3: Subset - use needs fewer lanes, keep subregister index
+ LLVM_DEBUG(dbgs() << " Subset case -> keeping subregister\n");
+ unsigned SubReg = MO.getSubReg();
+ assert(SubReg && "Subset case should have subregister");
+
+ MO.setReg(NewSSA);
+ // Keep the existing subregister index
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " Completed rewriting dominated uses\n");
+}
+
+//===----------------------------------------------------------------------===//
+// Internal helpers
+//===----------------------------------------------------------------------===//
+
+/// Return the VNInfo reaching this PHI operand along its predecessor edge.
+VNInfo *MachineLaneSSAUpdater::incomingOnEdge(LiveInterval &LI, MachineInstr *Phi,
+ MachineOperand &PhiOp) {
+ unsigned OpIdx = Phi->getOperandNo(&PhiOp);
+ MachineBasicBlock *Pred = Phi->getOperand(OpIdx + 1).getMBB();
+ SlotIndex EndB = LIS.getMBBEndIdx(Pred);
+ return LI.getVNInfoBefore(EndB);
+}
+
+/// True if \p UseMI's operand is reached by \p VNI (PHIs, same-block order,
+/// cross-block dominance).
+bool MachineLaneSSAUpdater::reachedByThisVNI(LiveInterval &LI, MachineInstr *DefMI,
+ MachineInstr *UseMI, MachineOperand &UseOp,
+ VNInfo *VNI) {
+ if (UseMI->isPHI())
+ return incomingOnEdge(LI, UseMI, UseOp) == VNI;
+
+ if (UseMI->getParent() == DefMI->getParent()) {
+ SlotIndex DefIdx = LIS.getInstructionIndex(*DefMI);
+ SlotIndex UseIdx = LIS.getInstructionIndex(*UseMI);
+ return DefIdx < UseIdx; // strict within-block order
+ }
+ return MDT.dominates(DefMI->getParent(), UseMI->getParent());
+}
+
+/// What lanes does this operand read?
+LaneBitmask MachineLaneSSAUpdater::operandLaneMask(const MachineOperand &MO) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (unsigned Sub = MO.getSubReg())
+ return TRI.getSubRegIndexLaneMask(Sub);
+ return MRI.getMaxLaneMaskForVReg(MO.getReg());
+}
+
+/// Build a REG_SEQUENCE to materialize a super-reg/mixed-lane use.
+/// Inserts at the PHI predecessor terminator (for PHI uses) or right before
+/// UseMI otherwise. Returns the new full-width vreg, the RS index via OutIdx,
+/// and the subrange lane masks that should be extended to that point.
+Register MachineLaneSSAUpdater::buildRSForSuperUse(MachineInstr *UseMI, MachineOperand &MO,
+ Register OldVR, Register NewVR,
+ LaneBitmask MaskToRewrite, LiveInterval &LI,
+ const TargetRegisterClass *OpRC,
+ SlotIndex &OutIdx,
+ SmallVectorImpl<LaneBitmask> &LanesToExtend) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ MachineBasicBlock *InsertBB = UseMI->getParent();
+ MachineBasicBlock::iterator IP(UseMI);
+ SlotIndex QueryIdx;
+
+ if (UseMI->isPHI()) {
+ unsigned OpIdx = UseMI->getOperandNo(&MO);
+ MachineBasicBlock *Pred = UseMI->getOperand(OpIdx + 1).getMBB();
+ InsertBB = Pred;
+ IP = Pred->getFirstTerminator(); // ok if == end()
+ QueryIdx = LIS.getMBBEndIdx(Pred).getPrevSlot();
+ } else {
+ QueryIdx = LIS.getInstructionIndex(*UseMI);
+ }
+
+ Register Dest = MRI.createVirtualRegister(OpRC);
+ auto RS = BuildMI(*InsertBB, IP,
+ (IP != InsertBB->end() ? IP->getDebugLoc() : DebugLoc()),
+ TII.get(TargetOpcode::REG_SEQUENCE), Dest);
+
+ SmallDenseSet<unsigned, 8> AddedSubIdxs;
+ SmallDenseSet<LaneBitmask::Type, 8> AddedMasks;
+
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if (!SR.getVNInfoAt(QueryIdx))
+ continue;
+ LaneBitmask Lane = SR.LaneMask;
+ if (!AddedMasks.insert(Lane.getAsInteger()).second)
+ continue;
+
+ unsigned SubIdx = getSubRegIndexForLaneMask(Lane, &TRI);
+ if (!SubIdx || !AddedSubIdxs.insert(SubIdx).second)
+ continue;
+
+ if (Lane == MaskToRewrite)
+ RS.addReg(NewVR).addImm(SubIdx);
+ else
+ RS.addReg(OldVR, 0, SubIdx).addImm(SubIdx);
+
+ LanesToExtend.push_back(Lane);
+ }
+
+ // Fallback: ensure at least the rewritten lane appears.
+ if (AddedSubIdxs.empty()) {
+ unsigned SubIdx = getSubRegIndexForLaneMask(MaskToRewrite, &TRI);
+ RS.addReg(NewVR).addImm(SubIdx);
+ LanesToExtend.push_back(MaskToRewrite);
+ }
+
+ LIS.InsertMachineInstrInMaps(*RS);
+ OutIdx = LIS.getInstructionIndex(*RS);
+
+ LLVM_DEBUG(dbgs() << " Built REG_SEQUENCE: ");
+ LLVM_DEBUG(RS->print(dbgs()));
+
+ return Dest;
+}
+
+/// Extend LI (and only the specified subranges) at Idx.
+void MachineLaneSSAUpdater::extendAt(LiveInterval &LI, SlotIndex Idx,
+ ArrayRef<LaneBitmask> Lanes) {
+ SmallVector<SlotIndex, 1> P{Idx};
+ LIS.extendToIndices(LI, P);
+ for (auto &SR : LI.subranges())
+ for (LaneBitmask L : Lanes)
+ if (SR.LaneMask == L)
+ LIS.extendToIndices(SR, P);
+}
+
+// Remove the old helper that's no longer needed
+// LaneBitmask MachineLaneSSAUpdater::getLaneMaskForOperand(...) - REMOVED
\ No newline at end of file
>From d37f704b7d3aac4711a1d394bd2e0948516995b8 Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Wed, 8 Oct 2025 20:19:50 +0000
Subject: [PATCH 3/7] [CodeGen] MachineLaneSSAUpdater bugfixing, refactoring
and unittests. WIP.
---
.../llvm/CodeGen/MachineLaneSSAUpdater.h | 34 +-
llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp | 392 +++++---
llvm/unittests/CodeGen/CMakeLists.txt | 1 +
.../CodeGen/MachineLaneSSAUpdaterTest.cpp | 885 ++++++++++++++++++
4 files changed, 1183 insertions(+), 129 deletions(-)
create mode 100644 llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
diff --git a/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
index b606124d4167e..2c49c63f6b3ff 100644
--- a/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
+++ b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
@@ -105,8 +105,15 @@ class SpillCutCollector {
//===----------------------------------------------------------------------===//
// MachineLaneSSAUpdater: universal SSA repair for Machine IR (lane-aware)
-// * addDefAndRepairNewDef : for plain new defs (no prior pruneValue)
-// * addDefAndRepairAfterSpill: for reloads (must consume CutEndPoints)
+//
+// Use Case 1 (Common): repairSSAForNewDef()
+// - Caller creates a new instruction that defines an existing vreg (violating SSA)
+// - This function creates a new vreg, replaces the operand, and repairs SSA
+// - Example: User inserts "OrigVReg = ADD ..." and calls repairSSAForNewDef()
+//
+// Use Case 2 (Spill/Reload): addDefAndRepairAfterSpill()
+// - Spiller has already created both instruction and new vreg
+// - Must consume CutEndPoints from spill-time
//===----------------------------------------------------------------------===//
class MachineLaneSSAUpdater {
public:
@@ -116,11 +123,20 @@ class MachineLaneSSAUpdater {
const TargetRegisterInfo &TRI)
: MF(MF), LIS(LIS), MDT(MDT), TRI(TRI) {}
- // Plain new-def path (no EndPoints required). The updater derives any
- // necessary data from the intact LIS.
- Register addDefAndRepairNewDef(MachineInstr &NewDefMI,
- Register OrigVReg,
- LaneBitmask DefMask);
+ // Use Case 1 (Common): Repair SSA for a new definition
+ //
+ // NewDefMI: Instruction with a def operand that currently defines OrigVReg (violating SSA)
+ // OrigVReg: The virtual register being redefined
+ //
+ // This function will:
+ // 1. Find the def operand in NewDefMI that defines OrigVReg
+ // 2. Derive the lane mask from the operand's subreg index (if any)
+ // 3. Create a new virtual register with appropriate register class
+ // 4. Replace the operand in NewDefMI to define the new vreg
+ // 5. Perform SSA repair (insert PHIs, rewrite uses)
+ //
+ // Returns: The newly created virtual register
+ Register repairSSAForNewDef(MachineInstr &NewDefMI, Register OrigVReg);
// Reload-after-spill path (requires spill-time EndPoints). Will assert
// if the token does not match the OrigVReg or if indices are inconsistent.
@@ -180,14 +196,14 @@ class MachineLaneSSAUpdater {
// Internal helper methods for use rewriting
VNInfo *incomingOnEdge(LiveInterval &LI, MachineInstr *Phi, MachineOperand &PhiOp);
- bool reachedByThisVNI(LiveInterval &LI, MachineInstr *DefMI, MachineInstr *UseMI,
- MachineOperand &UseOp, VNInfo *VNI);
+ bool defReachesUse(MachineInstr *DefMI, MachineInstr *UseMI, MachineOperand &UseOp);
LaneBitmask operandLaneMask(const MachineOperand &MO);
Register buildRSForSuperUse(MachineInstr *UseMI, MachineOperand &MO,
Register OldVR, Register NewVR, LaneBitmask MaskToRewrite,
LiveInterval &LI, const TargetRegisterClass *OpRC,
SlotIndex &OutIdx, SmallVectorImpl<LaneBitmask> &LanesToExtend);
void extendAt(LiveInterval &LI, SlotIndex Idx, ArrayRef<LaneBitmask> Lanes);
+ void updateDeadFlags(Register Reg);
// --- Data members ---
MachineFunction &MF;
diff --git a/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
index 4f0e0b5bfca27..9bcb4e52fe44a 100644
--- a/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
@@ -11,7 +11,11 @@
// spill scenarios with full subregister lane awareness.
//
// Key features:
-// - Two explicit entry points: addDefAndRepairNewDef and addDefAndRepairAfterSpill
+// - Two explicit entry points:
+// * repairSSAForNewDef - Common use case: caller creates instruction defining
+// existing vreg (violating SSA), updater creates new vreg and repairs
+// * addDefAndRepairAfterSpill - Spill/reload use case: caller creates instruction
+// with new vreg, updater repairs SSA using spill-time EndPoints
// - Lane-aware PHI insertion with per-edge masks
// - Pruned IDF computation (NewDefBlocks ∩ LiveIn(OldVR))
// - Precise LiveInterval extension using captured EndPoints
@@ -158,30 +162,101 @@ CutEndPoints SpillCutCollector::cut(Register OrigVReg, SlotIndex CutIdx,
// MachineLaneSSAUpdater Implementation
//===----------------------------------------------------------------------===//
-Register MachineLaneSSAUpdater::addDefAndRepairNewDef(MachineInstr &NewDefMI,
- Register OrigVReg,
- LaneBitmask DefMask) {
- LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::addDefAndRepairNewDef VReg=" << OrigVReg
- << " DefMask=" << PrintLaneMask(DefMask) << "\n");
+Register MachineLaneSSAUpdater::repairSSAForNewDef(MachineInstr &NewDefMI,
+ Register OrigVReg) {
+ LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::repairSSAForNewDef VReg=" << OrigVReg << "\n");
- // Step 1: Index the new instruction in SlotIndexes/LIS
- indexNewInstr(NewDefMI);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
- // Step 2: Extract the new SSA register from the definition instruction
- Register NewSSAVReg = NewDefMI.defs().begin()->getReg();
- assert(NewSSAVReg.isValid() && NewSSAVReg.isVirtual() &&
- "NewDefMI should define a valid virtual register");
+ // Step 1: Find the def operand that currently defines OrigVReg (violating SSA)
+ MachineOperand *DefOp = nullptr;
+ unsigned DefOpIdx = 0;
+ for (MachineOperand &MO : NewDefMI.defs()) {
+ if (MO.getReg() == OrigVReg) {
+ DefOp = &MO;
+ break;
+ }
+ ++DefOpIdx;
+ }
- // Step 3: Derive necessary data from intact LiveIntervals
- // The LiveInterval should already exist and be properly computed
- if (!LIS.hasInterval(NewSSAVReg)) {
- LIS.createAndComputeVirtRegInterval(NewSSAVReg);
+ assert(DefOp && "NewDefMI should have a def operand for OrigVReg");
+ assert(DefOp->isDef() && "Found operand should be a definition");
+
+ // Step 2: Derive DefMask from the operand's subreg index (if any)
+ unsigned SubRegIdx = DefOp->getSubReg();
+ LaneBitmask DefMask;
+
+ if (SubRegIdx) {
+ // Partial register definition - get lane mask for this subreg
+ DefMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+ LLVM_DEBUG(dbgs() << " Partial def with subreg " << TRI.getSubRegIndexName(SubRegIdx)
+ << ", DefMask=" << PrintLaneMask(DefMask) << "\n");
+ } else {
+ // Full register definition - get all lanes for this register class
+ DefMask = MRI.getMaxLaneMaskForVReg(OrigVReg);
+ LLVM_DEBUG(dbgs() << " Full register def, DefMask=" << PrintLaneMask(DefMask) << "\n");
}
- // Step 4: Perform common SSA repair (PHI placement + use rewriting)
+ // Step 3: Create a new virtual register with appropriate register class
+ // If this is a subreg def, we need the class for the subreg, not the full reg
+ const TargetRegisterClass *RC;
+ if (SubRegIdx) {
+ // For subreg defs, get the subreg class
+ const TargetRegisterClass *OrigRC = MRI.getRegClass(OrigVReg);
+ RC = TRI.getSubRegisterClass(OrigRC, SubRegIdx);
+ assert(RC && "Failed to get subregister class for subreg def - would create incorrect MIR");
+ } else {
+ // For full reg defs, use the same class as OrigVReg
+ RC = MRI.getRegClass(OrigVReg);
+ }
+
+ Register NewSSAVReg = MRI.createVirtualRegister(RC);
+ LLVM_DEBUG(dbgs() << " Created new SSA vreg " << NewSSAVReg << " with RC=" << TRI.getRegClassName(RC) << "\n");
+
+ // Step 4: Replace the operand in NewDefMI to define the new vreg
+ // If this was a subreg def, the new vreg is a full register of the subreg class
+ // so we clear the subreg index (e.g., %1.sub0:vreg_64 becomes %3:vgpr_32)
+ DefOp->setReg(NewSSAVReg);
+ if (SubRegIdx) {
+ DefOp->setSubReg(0);
+ LLVM_DEBUG(dbgs() << " Replaced operand: " << OrigVReg << "." << TRI.getSubRegIndexName(SubRegIdx)
+ << " -> " << NewSSAVReg << " (full register)\n");
+ } else {
+ LLVM_DEBUG(dbgs() << " Replaced operand: " << OrigVReg << " -> " << NewSSAVReg << "\n");
+ }
+
+ // Step 5: Index the new instruction in SlotIndexes/LIS
+ indexNewInstr(NewDefMI);
+
+ // Step 6: Perform common SSA repair (PHI placement + use rewriting)
+ // LiveInterval for NewSSAVReg will be created by getInterval() as needed
performSSARepair(NewSSAVReg, OrigVReg, DefMask, NewDefMI.getParent());
- LLVM_DEBUG(dbgs() << " New def SSA repair complete, returning " << NewSSAVReg << "\n");
+ // Step 7: If SSA repair created subregister uses of OrigVReg (e.g., in PHIs or REG_SEQUENCEs),
+ // recompute its LiveInterval to create subranges
+ LaneBitmask AllLanes = MRI.getMaxLaneMaskForVReg(OrigVReg);
+ if (DefMask != AllLanes) {
+ LiveInterval &OrigLI = LIS.getInterval(OrigVReg);
+ if (!OrigLI.hasSubRanges()) {
+ // Check if any uses now access OrigVReg with subregister indices
+ bool HasSubregUses = false;
+ for (const MachineOperand &MO : MRI.use_operands(OrigVReg)) {
+ if (MO.getSubReg() != 0) {
+ HasSubregUses = true;
+ break;
+ }
+ }
+
+ if (HasSubregUses) {
+ LLVM_DEBUG(dbgs() << " Recomputing LiveInterval for " << OrigVReg
+ << " after SSA repair created subregister uses\n");
+ LIS.removeInterval(OrigVReg);
+ LIS.createAndComputeVirtRegInterval(OrigVReg);
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " repairSSAForNewDef complete, returning " << NewSSAVReg << "\n");
return NewSSAVReg;
}
@@ -264,6 +339,7 @@ void MachineLaneSSAUpdater::performSSARepair(Register NewVReg, Register OrigVReg
SmallVector<Register> AllPHIVRegs = insertLaneAwarePHI(NewVReg, OrigVReg, DefMask, DefBB);
// Step 2: Rewrite dominated uses once for each new register
+ // Note: getInterval() will automatically create LiveIntervals if needed
rewriteDominatedUses(OrigVReg, NewVReg, DefMask);
for (Register PHIVReg : AllPHIVRegs) {
rewriteDominatedUses(OrigVReg, PHIVReg, DefMask);
@@ -275,17 +351,26 @@ void MachineLaneSSAUpdater::performSSARepair(Register NewVReg, Register OrigVReg
// Also renumber PHI intervals
for (Register PHIVReg : AllPHIVRegs) {
- if (LIS.hasInterval(PHIVReg)) {
- LiveInterval &PHILI = LIS.getInterval(PHIVReg);
- PHILI.RenumberValues();
- }
+ LiveInterval &PHILI = LIS.getInterval(PHIVReg);
+ PHILI.RenumberValues();
}
// Also renumber original interval if it was modified
LiveInterval &OrigLI = LIS.getInterval(OrigVReg);
OrigLI.RenumberValues();
- // Step 4: Verification if enabled
+ // Recompute OrigVReg's LiveInterval after rewriting uses
+ // Some uses may have been rewritten to NewVReg or PHI registers,
+ // so OrigVReg's live range may need to shrink
+ LIS.shrinkToUses(&OrigLI);
+
+ // Step 4: Update operand flags to match the LiveIntervals
+ updateDeadFlags(NewVReg);
+ for (Register PHIVReg : AllPHIVRegs) {
+ updateDeadFlags(PHIVReg);
+ }
+
+ // Step 5: Verification if enabled
if (VerifyOnExit) {
LLVM_DEBUG(dbgs() << " Verifying after SSA repair...\n");
// TODO: Add verification calls
@@ -479,60 +564,44 @@ SmallVector<Register> MachineLaneSSAUpdater::insertLaneAwarePHI(Register Initial
LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::insertLaneAwarePHI InitialVReg=" << InitialVReg
<< " OrigVReg=" << OrigVReg << " DefMask=" << PrintLaneMask(DefMask) << "\n");
- // Worklist item: (VReg, DefBB) pairs that need PHI placement
- struct WorkItem {
- Register VReg;
- MachineBasicBlock *DefBB;
- WorkItem(Register V, MachineBasicBlock *BB) : VReg(V), DefBB(BB) {}
- };
-
SmallVector<Register> AllCreatedPHIs;
- SmallVector<WorkItem> Worklist;
- DenseSet<MachineBasicBlock *> ProcessedBlocks; // Avoid duplicate PHIs in same block
- // Seed worklist with initial definition
- Worklist.emplace_back(InitialVReg, InitialDefBB);
+ // Step 1: Compute IDF (Iterated Dominance Frontier) for the initial definition
+ // This gives us ALL blocks where PHI nodes need to be inserted
+ SmallVector<MachineBasicBlock *> DefBlocks = {InitialDefBB};
+ SmallVector<MachineBasicBlock *> IDFBlocks;
+ computePrunedIDF(OrigVReg, DefMask, DefBlocks, IDFBlocks);
- LLVM_DEBUG(dbgs() << " Starting worklist processing...\n");
+ LLVM_DEBUG(dbgs() << " Computed IDF: found " << IDFBlocks.size() << " blocks needing PHIs\n");
+ for (MachineBasicBlock *MBB : IDFBlocks) {
+ LLVM_DEBUG(dbgs() << " BB#" << MBB->getNumber() << "\n");
+ }
- while (!Worklist.empty()) {
- WorkItem Item = Worklist.pop_back_val();
-
- LLVM_DEBUG(dbgs() << " Processing VReg=" << Item.VReg
- << " DefBB=#" << Item.DefBB->getNumber() << "\n");
-
- // Step 1: Compute pruned IDF for this definition
- SmallVector<MachineBasicBlock *> DefBlocks = {Item.DefBB};
- SmallVector<MachineBasicBlock *> IDFBlocks;
- computePrunedIDF(OrigVReg, DefMask, DefBlocks, IDFBlocks);
+ // Step 2: Iterate through IDF blocks sequentially, creating PHIs
+ // Key insight: After creating a PHI, update NewVReg to the PHI result
+ // so subsequent PHIs use the correct register
+ Register CurrentNewVReg = InitialVReg;
+
+ for (MachineBasicBlock *JoinMBB : IDFBlocks) {
+ LLVM_DEBUG(dbgs() << " Creating PHI in BB#" << JoinMBB->getNumber()
+ << " with CurrentNewVReg=" << CurrentNewVReg << "\n");
- LLVM_DEBUG(dbgs() << " Found " << IDFBlocks.size() << " IDF blocks\n");
+ // Create PHI: merges OrigVReg and CurrentNewVReg based on dominance
+ Register PHIResult = createPHIInBlock(*JoinMBB, OrigVReg, CurrentNewVReg);
- // Step 2: Create PHIs in each IDF block
- for (MachineBasicBlock *JoinMBB : IDFBlocks) {
- // Skip if we already processed this join block (avoid duplicate PHIs)
- if (ProcessedBlocks.contains(JoinMBB)) {
- LLVM_DEBUG(dbgs() << " Skipping already processed BB#" << JoinMBB->getNumber() << "\n");
- continue;
- }
- ProcessedBlocks.insert(JoinMBB);
+ if (PHIResult.isValid()) {
+ AllCreatedPHIs.push_back(PHIResult);
- LLVM_DEBUG(dbgs() << " Creating PHI in BB#" << JoinMBB->getNumber() << "\n");
+ // Update CurrentNewVReg to be the PHI result
+ // This ensures the next PHI (if any) uses this PHI's result, not the original InitialVReg
+ CurrentNewVReg = PHIResult;
- // Create PHI using the original per-edge analysis logic
- Register PHIResult = createPHIInBlock(*JoinMBB, OrigVReg, Item.VReg);
-
- // Add PHI result to worklist for further processing and to result collection
- if (PHIResult.isValid()) {
- Worklist.emplace_back(PHIResult, JoinMBB);
- AllCreatedPHIs.push_back(PHIResult);
- LLVM_DEBUG(dbgs() << " Created PHI result VReg=" << PHIResult
- << ", added to worklist\n");
- }
+ LLVM_DEBUG(dbgs() << " Created PHI result VReg=" << PHIResult
+ << ", will use this for subsequent PHIs\n");
}
}
- LLVM_DEBUG(dbgs() << " Worklist processing complete. Created "
+ LLVM_DEBUG(dbgs() << " PHI insertion complete. Created "
<< AllCreatedPHIs.size() << " PHI registers total.\n");
return AllCreatedPHIs;
@@ -554,16 +623,21 @@ Register MachineLaneSSAUpdater::createPHIInBlock(MachineBasicBlock &JoinMBB,
// Collect PHI operands for the specific reload lanes
SmallVector<MachineOperand> PHIOperands;
- LiveInterval &NewLI = LIS.getInterval(NewVReg);
LLVM_DEBUG(dbgs() << " Creating PHI for " << (IsPartialReload ? "partial reload" : "full reload")
<< " ReloadMask=" << PrintLaneMask(ReloadMask) << "\n");
+ // Get the definition block of NewVReg for dominance checks
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineInstr *NewDefMI = MRI.getVRegDef(NewVReg);
+ MachineBasicBlock *NewDefBB = NewDefMI->getParent();
+
for (MachineBasicBlock *PredMBB : JoinMBB.predecessors()) {
- // Check if NewVReg (reloaded register) is live-out from this predecessor
- bool NewVRegLive = LIS.isLiveOutOfMBB(NewLI, PredMBB);
+ // Use dominance check instead of liveness: if NewDefBB dominates PredMBB,
+ // then NewVReg is available at the end of PredMBB
+ bool UseNewReg = MDT.dominates(NewDefBB, PredMBB);
- if (NewVRegLive) {
+ if (UseNewReg) {
// This is the reload path - use NewVReg (always full register for its class)
LLVM_DEBUG(dbgs() << " Pred BB#" << PredMBB->getNumber()
<< " contributes NewVReg (reload path)\n");
@@ -604,7 +678,6 @@ Register MachineLaneSSAUpdater::createPHIInBlock(MachineBasicBlock &JoinMBB,
MachineInstr *PHI = PHINode.getInstr();
LIS.InsertMachineInstrInMaps(*PHI);
- LIS.createAndComputeVirtRegInterval(PHIVReg);
LLVM_DEBUG(dbgs() << " Created lane-specific PHI: ");
LLVM_DEBUG(PHI->print(dbgs()));
@@ -630,20 +703,15 @@ void MachineLaneSSAUpdater::rewriteDominatedUses(Register OrigVReg,
return;
}
- // Get the LiveInterval and VNInfo for the definition
- LiveInterval &LI = LIS.getInterval(OrigVReg);
- SlotIndex DefIdx = LIS.getInstructionIndex(*DefMI).getRegSlot();
- VNInfo *VNI = LI.getVNInfoAt(DefIdx);
- if (!VNI) {
- LLVM_DEBUG(dbgs() << " No VNInfo found for definition, skipping\n");
- return;
- }
-
+ MachineBasicBlock *DefBB = DefMI->getParent();
const TargetRegisterClass *NewRC = MRI.getRegClass(NewSSA);
- LLVM_DEBUG(dbgs() << " Rewriting uses reached by VNI " << VNI->id << " from: ");
+ LLVM_DEBUG(dbgs() << " Rewriting uses dominated by definition in BB#" << DefBB->getNumber() << ": ");
LLVM_DEBUG(DefMI->print(dbgs()));
+ // Get OrigVReg's LiveInterval for reference
+ LiveInterval &OrigLI = LIS.getInterval(OrigVReg);
+
// Iterate through all uses of OrigVReg
for (MachineOperand &MO : llvm::make_early_inc_range(MRI.use_operands(OrigVReg))) {
MachineInstr *UseMI = MO.getParent();
@@ -652,8 +720,8 @@ void MachineLaneSSAUpdater::rewriteDominatedUses(Register OrigVReg,
if (UseMI == DefMI)
continue;
- // Check if this use is reached by our VNI
- if (!reachedByThisVNI(LI, DefMI, UseMI, MO, VNI))
+ // Check if this use is reached by our definition
+ if (!defReachesUse(DefMI, UseMI, MO))
continue;
// Get the lane mask for this operand
@@ -669,10 +737,23 @@ void MachineLaneSSAUpdater::rewriteDominatedUses(Register OrigVReg,
// Case 1: Exact match - direct replacement
if (OpMask == MaskToRewrite) {
// Check register class compatibility
- if (TRI.getCommonSubClass(NewRC, OpRC)) {
+ // If operand uses a subreg, NewRC should match the subreg class
+ // If operand uses full register, NewRC should match OpRC
+ const TargetRegisterClass *ExpectedRC = MO.getSubReg() != 0
+ ? TRI.getSubRegisterClass(OpRC, MO.getSubReg())
+ : OpRC;
+ bool Compatible = (ExpectedRC == NewRC);
+
+ if (Compatible) {
LLVM_DEBUG(dbgs() << " Exact match -> direct replacement\n");
MO.setReg(NewSSA);
- MO.setSubReg(0); // Clear subregister
+ MO.setSubReg(0); // Clear subregister (NewSSA is a full register of NewRC)
+
+ // Extend NewSSA's live interval to cover this use
+ SlotIndex UseIdx = LIS.getInstructionIndex(*UseMI).getRegSlot();
+ LiveInterval &NewLI = LIS.getInterval(NewSSA);
+ LIS.extendToIndices(NewLI, {UseIdx});
+
continue;
}
@@ -687,11 +768,27 @@ void MachineLaneSSAUpdater::rewriteDominatedUses(Register OrigVReg,
SmallVector<LaneBitmask, 4> LanesToExtend;
SlotIndex RSIdx;
Register RSReg = buildRSForSuperUse(UseMI, MO, OrigVReg, NewSSA, MaskToRewrite,
- LI, OpRC, RSIdx, LanesToExtend);
- extendAt(LI, RSIdx, LanesToExtend);
+ OrigLI, OpRC, RSIdx, LanesToExtend);
+ extendAt(OrigLI, RSIdx, LanesToExtend);
MO.setReg(RSReg);
MO.setSubReg(0);
+ // Extend RSReg's live interval to cover this use
+ SlotIndex UseIdx;
+ if (UseMI->isPHI()) {
+ // For PHI, the value must be live at the end of the predecessor block
+ unsigned OpIdx = UseMI->getOperandNo(&MO);
+ MachineBasicBlock *Pred = UseMI->getOperand(OpIdx + 1).getMBB();
+ UseIdx = LIS.getMBBEndIdx(Pred);
+ } else {
+ UseIdx = LIS.getInstructionIndex(*UseMI).getRegSlot();
+ }
+ LiveInterval &RSLI = LIS.getInterval(RSReg);
+ LIS.extendToIndices(RSLI, {UseIdx});
+
+ // Update dead flag on REG_SEQUENCE result
+ updateDeadFlags(RSReg);
+
} else {
// Case 3: Subset - use needs fewer lanes, keep subregister index
LLVM_DEBUG(dbgs() << " Subset case -> keeping subregister\n");
@@ -700,6 +797,11 @@ void MachineLaneSSAUpdater::rewriteDominatedUses(Register OrigVReg,
MO.setReg(NewSSA);
// Keep the existing subregister index
+
+ // Extend NewSSA's live interval to cover this use
+ SlotIndex UseIdx = LIS.getInstructionIndex(*UseMI).getRegSlot();
+ LiveInterval &NewLI = LIS.getInterval(NewSSA);
+ LIS.extendToIndices(NewLI, {UseIdx});
}
}
@@ -719,19 +821,33 @@ VNInfo *MachineLaneSSAUpdater::incomingOnEdge(LiveInterval &LI, MachineInstr *Ph
return LI.getVNInfoBefore(EndB);
}
-/// True if \p UseMI's operand is reached by \p VNI (PHIs, same-block order,
-/// cross-block dominance).
-bool MachineLaneSSAUpdater::reachedByThisVNI(LiveInterval &LI, MachineInstr *DefMI,
- MachineInstr *UseMI, MachineOperand &UseOp,
- VNInfo *VNI) {
- if (UseMI->isPHI())
- return incomingOnEdge(LI, UseMI, UseOp) == VNI;
+/// Check if \p DefMI's definition reaches \p UseMI's use operand.
+/// During SSA reconstruction, LiveIntervals may not be complete yet, so we use
+/// dominance-based checking rather than querying LiveInterval reachability.
+///
+/// TODO: This dominance-based approach doesn't handle back edges correctly.
+/// For loop back edges, the definition in the loop body doesn't dominate the
+/// loop header PHI's predecessor, but the value does reach the PHI operand.
+/// We need proper reachability analysis (e.g., checking if there's a path from
+/// DefMI to the predecessor block) to handle loops correctly.
+bool MachineLaneSSAUpdater::defReachesUse(MachineInstr *DefMI,
+ MachineInstr *UseMI,
+ MachineOperand &UseOp) {
+ // For PHI uses, check if DefMI dominates the predecessor block
+ if (UseMI->isPHI()) {
+ unsigned OpIdx = UseMI->getOperandNo(&UseOp);
+ MachineBasicBlock *Pred = UseMI->getOperand(OpIdx + 1).getMBB();
+ return MDT.dominates(DefMI->getParent(), Pred);
+ }
+ // For same-block uses, check instruction order
if (UseMI->getParent() == DefMI->getParent()) {
SlotIndex DefIdx = LIS.getInstructionIndex(*DefMI);
SlotIndex UseIdx = LIS.getInstructionIndex(*UseMI);
- return DefIdx < UseIdx; // strict within-block order
+ return DefIdx < UseIdx;
}
+
+ // For cross-block uses, check block dominance
return MDT.dominates(DefMI->getParent(), UseMI->getParent());
}
@@ -778,37 +894,55 @@ Register MachineLaneSSAUpdater::buildRSForSuperUse(MachineInstr *UseMI, MachineO
(IP != InsertBB->end() ? IP->getDebugLoc() : DebugLoc()),
TII.get(TargetOpcode::REG_SEQUENCE), Dest);
+ // Determine what lanes the use needs
+ LaneBitmask UseMask = operandLaneMask(MO);
+
+ // Decompose into lanes from NewVR (updated) and lanes from OldVR (unchanged)
+ LaneBitmask LanesFromNew = UseMask & MaskToRewrite;
+ LaneBitmask LanesFromOld = UseMask & ~MaskToRewrite;
+
+ LLVM_DEBUG(dbgs() << " Building REG_SEQUENCE: UseMask=" << PrintLaneMask(UseMask)
+ << " LanesFromNew=" << PrintLaneMask(LanesFromNew)
+ << " LanesFromOld=" << PrintLaneMask(LanesFromOld) << "\n");
+
SmallDenseSet<unsigned, 8> AddedSubIdxs;
- SmallDenseSet<LaneBitmask::Type, 8> AddedMasks;
-
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if (!SR.getVNInfoAt(QueryIdx))
- continue;
- LaneBitmask Lane = SR.LaneMask;
- if (!AddedMasks.insert(Lane.getAsInteger()).second)
- continue;
-
- unsigned SubIdx = getSubRegIndexForLaneMask(Lane, &TRI);
- if (!SubIdx || !AddedSubIdxs.insert(SubIdx).second)
- continue;
-
- if (Lane == MaskToRewrite)
- RS.addReg(NewVR).addImm(SubIdx);
- else
- RS.addReg(OldVR, 0, SubIdx).addImm(SubIdx);
-
- LanesToExtend.push_back(Lane);
+
+ // Add source for lanes from NewVR (updated lanes)
+ if (LanesFromNew.any()) {
+ unsigned SubIdx = getSubRegIndexForLaneMask(LanesFromNew, &TRI);
+ assert(SubIdx && "Failed to find subregister index for LanesFromNew");
+ RS.addReg(NewVR, 0, 0).addImm(SubIdx); // NewVR is full register, no subreg
+ AddedSubIdxs.insert(SubIdx);
+ LanesToExtend.push_back(LanesFromNew);
}
-
- // Fallback: ensure at least the rewritten lane appears.
- if (AddedSubIdxs.empty()) {
- unsigned SubIdx = getSubRegIndexForLaneMask(MaskToRewrite, &TRI);
- RS.addReg(NewVR).addImm(SubIdx);
- LanesToExtend.push_back(MaskToRewrite);
+
+ // Add source for lanes from OldVR (unchanged lanes)
+ if (LanesFromOld.any()) {
+ unsigned SubIdx = getSubRegIndexForLaneMask(LanesFromOld, &TRI);
+ assert(SubIdx && "Failed to find subregister index for LanesFromOld");
+ RS.addReg(OldVR, 0, SubIdx).addImm(SubIdx); // OldVR.subIdx
+ AddedSubIdxs.insert(SubIdx);
+ LanesToExtend.push_back(LanesFromOld);
}
+
+ assert(!AddedSubIdxs.empty() && "REG_SEQUENCE must have at least one source");
LIS.InsertMachineInstrInMaps(*RS);
OutIdx = LIS.getInstructionIndex(*RS);
+
+ // Create live interval for the REG_SEQUENCE result
+ LIS.createAndComputeVirtRegInterval(Dest);
+
+ // Extend live intervals of all source registers to cover this REG_SEQUENCE
+ // Use the register slot to ensure the live range covers the use
+ SlotIndex UseSlot = OutIdx.getRegSlot();
+ for (MachineOperand &MO : RS.getInstr()->uses()) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
+ Register SrcReg = MO.getReg();
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ LIS.extendToIndices(SrcLI, {UseSlot});
+ }
+ }
LLVM_DEBUG(dbgs() << " Built REG_SEQUENCE: ");
LLVM_DEBUG(RS->print(dbgs()));
@@ -827,5 +961,23 @@ void MachineLaneSSAUpdater::extendAt(LiveInterval &LI, SlotIndex Idx,
LIS.extendToIndices(SR, P);
}
+void MachineLaneSSAUpdater::updateDeadFlags(Register Reg) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ LiveInterval &LI = LIS.getInterval(Reg);
+ MachineInstr *DefMI = MRI.getVRegDef(Reg);
+ if (!DefMI)
+ return;
+
+ for (MachineOperand &MO : DefMI->defs()) {
+ if (MO.getReg() == Reg && MO.isDead()) {
+ // Check if this register is actually live (has uses)
+ if (!LI.empty() && !MRI.use_nodbg_empty(Reg)) {
+ MO.setIsDead(false);
+ LLVM_DEBUG(dbgs() << " Cleared dead flag on " << Reg << "\n");
+ }
+ }
+ }
+}
+
// Remove the old helper that's no longer needed
// LaneBitmask MachineLaneSSAUpdater::getLaneMaskForOperand(...) - REMOVED
\ No newline at end of file
diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt
index 22dbdaa4fa82e..11b031d8f4c58 100644
--- a/llvm/unittests/CodeGen/CMakeLists.txt
+++ b/llvm/unittests/CodeGen/CMakeLists.txt
@@ -36,6 +36,7 @@ add_llvm_unittest(CodeGenTests
MachineDomTreeUpdaterTest.cpp
MachineInstrBundleIteratorTest.cpp
MachineInstrTest.cpp
+ MachineLaneSSAUpdaterTest.cpp
MachineOperandTest.cpp
RegAllocScoreTest.cpp
PassManagerTest.cpp
diff --git a/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
new file mode 100644
index 0000000000000..1ac94c14aa31e
--- /dev/null
+++ b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
@@ -0,0 +1,885 @@
+//===- MachineLaneSSAUpdaterTest.cpp - Unit tests for MachineLaneSSAUpdater -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLaneSSAUpdater.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+// TestPass needs to be defined outside anonymous namespace for INITIALIZE_PASS
+struct TestPass : public MachineFunctionPass {
+ static char ID;
+ TestPass() : MachineFunctionPass(ID) {}
+};
+
+char TestPass::ID = 0;
+
+namespace llvm {
+ void initializeTestPassPass(PassRegistry &);
+}
+
+INITIALIZE_PASS(TestPass, "testpass", "testpass", false, false)
+
+namespace {
+
+void initLLVM() {
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+ InitializeAllAsmPrinters();
+ InitializeAllAsmParsers();
+
+ PassRegistry *Registry = PassRegistry::getPassRegistry();
+ initializeCore(*Registry);
+ initializeCodeGen(*Registry);
+}
+
+// Helper to create a target machine for AMDGPU
+std::unique_ptr<TargetMachine> createTargetMachine() {
+ Triple TT("amdgcn--");
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget("", TT, Error);
+ if (!T)
+ return nullptr;
+
+ TargetOptions Options;
+ return std::unique_ptr<TargetMachine>(
+ T->createTargetMachine(TT, "gfx900", "", Options, std::nullopt,
+ std::nullopt, CodeGenOptLevel::Aggressive));
+}
+
+// Helper to parse MIR string with legacy PassManager
+std::unique_ptr<Module> parseMIR(LLVMContext &Context,
+ legacy::PassManagerBase &PM,
+ std::unique_ptr<MIRParser> &MIR,
+ const TargetMachine &TM, StringRef MIRCode) {
+ SMDiagnostic Diagnostic;
+ std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
+ MIR = createMIRParser(std::move(MBuffer), Context);
+ if (!MIR)
+ return nullptr;
+
+ std::unique_ptr<Module> M = MIR->parseIRModule();
+ if (!M)
+ return nullptr;
+
+ M->setDataLayout(TM.createDataLayout());
+
+ MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(&TM);
+ if (MIR->parseMachineFunctions(*M, MMIWP->getMMI()))
+ return nullptr;
+ PM.add(MMIWP);
+
+ return M;
+}
+
+template <typename AnalysisType>
+struct TestPassT : public TestPass {
+ typedef std::function<void(MachineFunction&, AnalysisType&)> TestFx;
+
+ TestPassT() {
+ // We should never call this but always use PM.add(new TestPass(...))
+ abort();
+ }
+
+ TestPassT(TestFx T, bool ShouldPass)
+ : T(T), ShouldPass(ShouldPass) {
+ initializeTestPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ AnalysisType &A = getAnalysis<AnalysisType>();
+ T(MF, A);
+ bool VerifyResult = MF.verify(this, /* Banner=*/nullptr,
+ /*OS=*/&llvm::errs(),
+ /* AbortOnError=*/false);
+ EXPECT_EQ(VerifyResult, ShouldPass);
+ return true;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<AnalysisType>();
+ AU.addPreserved<AnalysisType>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ TestFx T;
+ bool ShouldPass;
+};
+
+template <typename AnalysisType>
+static void doTest(StringRef MIRFunc,
+ typename TestPassT<AnalysisType>::TestFx T,
+ bool ShouldPass = true) {
+ initLLVM();
+
+ LLVMContext Context;
+ std::unique_ptr<TargetMachine> TM = createTargetMachine();
+ if (!TM)
+ GTEST_SKIP() << "AMDGPU target not available";
+
+ legacy::PassManager PM;
+ std::unique_ptr<MIRParser> MIR;
+ std::unique_ptr<Module> M = parseMIR(Context, PM, MIR, *TM, MIRFunc);
+ ASSERT_TRUE(M);
+
+ PM.add(new TestPassT<AnalysisType>(T, ShouldPass));
+
+ PM.run(*M);
+}
+
+static void liveIntervalsTest(StringRef MIRFunc,
+ TestPassT<LiveIntervalsWrapperPass>::TestFx T,
+ bool ShouldPass = true) {
+ SmallString<512> S;
+ StringRef MIRString = (Twine(R"MIR(
+--- |
+ define amdgpu_kernel void @func() { ret void }
+...
+---
+name: func
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+body: |
+ bb.0:
+)MIR") + Twine(MIRFunc) + Twine("...\n")).toNullTerminatedStringRef(S);
+
+ doTest<LiveIntervalsWrapperPass>(MIRString, T, ShouldPass);
+}
+
+//===----------------------------------------------------------------------===//
+// Test 1: Insert new definition and verify SSA repair with PHI insertion
+//===----------------------------------------------------------------------===//
+
+TEST(MachineLaneSSAUpdaterTest, NewDefInsertsPhiAndRewritesUses) {
+ liveIntervalsTest(R"MIR(
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2, %bb.3
+ %1:vgpr_32 = V_ADD_U32_e32 %0, %0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+
+ bb.2:
+ successors: %bb.4
+ %2:vgpr_32 = V_ADD_U32_e32 %1, %1, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ successors: %bb.4
+ S_NOP 0
+
+ bb.4:
+ %5:vgpr_32 = V_ADD_U32_e32 %1, %1, implicit $exec
+ S_ENDPGM 0
+)MIR",
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Verify we have 5 blocks as expected
+ ASSERT_EQ(MF.size(), 5u) << "Should have bb.0, bb.1, bb.2, bb.3, bb.4";
+
+ MachineBasicBlock *BB1 = MF.getBlockNumbered(1);
+ MachineBasicBlock *BB3 = MF.getBlockNumbered(3);
+ MachineBasicBlock *BB4 = MF.getBlockNumbered(4);
+
+ // Get %1 which is defined in bb.1 (first non-PHI instruction)
+ MachineInstr *OrigDefMI = &*BB1->getFirstNonPHI();
+ ASSERT_TRUE(OrigDefMI) << "Could not find instruction in bb.1";
+ ASSERT_TRUE(OrigDefMI->getNumOperands() > 0) << "Instruction has no operands";
+
+ Register OrigReg = OrigDefMI->getOperand(0).getReg();
+ ASSERT_TRUE(OrigReg.isValid()) << "Could not get destination register %1 from bb.1";
+
+ // Count uses before SSA repair
+ unsigned UseCountBefore = 0;
+ for (const MachineInstr &MI : MRI.use_instructions(OrigReg)) {
+ (void)MI;
+ ++UseCountBefore;
+ }
+ ASSERT_GT(UseCountBefore, 0u) << "Original register should have uses";
+
+ // Find V_MOV_B32_e32 instruction in bb.0 to get its opcode
+ MachineBasicBlock *BB0 = MF.getBlockNumbered(0);
+ MachineInstr *MovInst = &*BB0->begin();
+ unsigned MovOpcode = MovInst->getOpcode();
+ Register ExecReg = MovInst->getOperand(2).getReg(); // Get EXEC register
+
+ // Create a new definition in bb.3 that defines OrigReg (violating SSA)
+ // This creates a scenario where bb.4 needs a PHI to merge values from bb.2 and bb.3
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ auto InsertPt = BB3->getFirstNonPHI();
+ MachineInstr *NewDefMI = BuildMI(*BB3, InsertPt, DebugLoc(),
+ TII->get(MovOpcode), OrigReg)
+ .addImm(42)
+ .addReg(ExecReg, RegState::Implicit);
+
+ // Set MachineFunction properties to allow PHIs and indicate SSA form
+ MF.getProperties().set(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
+ // NOW TEST MachineLaneSSAUpdater: call repairSSAForNewDef
+ // Before: %1 defined in bb.1, used in bb.2 and bb.4
+ // NewDefMI in bb.3 also defines %1 (violating SSA!)
+ // After repair: NewDefMI will define a new vreg, bb.4 gets PHI
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
+
+ // VERIFY RESULTS:
+
+ // 1. NewReg should be valid and different from OrigReg
+ EXPECT_TRUE(NewReg.isValid()) << "Updater should create a new register";
+ EXPECT_NE(NewReg, OrigReg) << "New register should be different from original";
+
+ // 2. NewDefMI should now define NewReg (not OrigReg)
+ EXPECT_EQ(NewDefMI->getOperand(0).getReg(), NewReg) << "NewDefMI should now define the new register";
+
+
+ // 3. Check if PHI nodes were inserted in bb.4
+ bool FoundPHI = false;
+ for (MachineInstr &MI : *BB4) {
+ if (MI.isPHI()) {
+ FoundPHI = true;
+ break;
+ }
+ }
+ EXPECT_TRUE(FoundPHI) << "SSA repair should have inserted PHI node in bb.4";
+
+ // 4. Verify LiveIntervals are still valid
+ EXPECT_TRUE(LIS.hasInterval(NewReg)) << "New register should have live interval";
+ EXPECT_TRUE(LIS.hasInterval(OrigReg)) << "Original register should still have live interval";
+
+ // Note: MachineFunction verification happens in TestPassT::runOnMachineFunction
+ // If verification fails, print the MachineFunction for debugging
+ if (!MF.verify(nullptr, /* Banner=*/nullptr, /*OS=*/nullptr, /* AbortOnError=*/false)) {
+ llvm::errs() << "MachineFunction verification failed after SSA repair:\n";
+ MF.print(llvm::errs());
+ LIS.print(llvm::errs());
+ }
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Test 2: Multiple PHI insertions in nested control flow
+//
+// CFG structure (from user's diagram):
+// bb.0
+// |
+// bb.1 (%1 = original def)
+// / \
+// bb.2 bb.3
+// | / \
+// | bb.4 bb.5 (new def inserted here)
+// | \ /
+// | bb.6 (needs first PHI: %X = PHI %1,bb.4 NewDef,bb.5)
+// \ /
+// bb.7 (needs second PHI: %Y = PHI %1,bb.2 %X,bb.6)
+// |
+// bb.8 (use)
+//
+// Key insight: IDF(bb.5) = {bb.6, bb.7}
+// - bb.6 needs PHI because it's reachable from bb.4 (has %1) and bb.5 (has new def)
+// - bb.7 needs PHI because it's reachable from bb.2 (has %1) and bb.6 (has PHI result %X)
+//
+// This truly requires TWO PHI nodes for proper SSA form!
+//===----------------------------------------------------------------------===//
+
+TEST(MachineLaneSSAUpdaterTest, MultiplePhiInsertion) {
+ liveIntervalsTest(R"MIR(
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2, %bb.3
+ %1:vgpr_32 = V_ADD_U32_e32 %0, %0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+
+ bb.2:
+ successors: %bb.7
+ %2:vgpr_32 = V_ADD_U32_e32 %1, %1, implicit $exec
+ S_BRANCH %bb.7
+
+ bb.3:
+ successors: %bb.4, %bb.5
+ $sgpr2 = S_MOV_B32 0
+ $sgpr3 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr2, $sgpr3, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.5, implicit $scc
+
+ bb.4:
+ successors: %bb.6
+ %3:vgpr_32 = V_ADD_U32_e32 %1, %1, implicit $exec
+ S_BRANCH %bb.6
+
+ bb.5:
+ successors: %bb.6
+ S_NOP 0
+
+ bb.6:
+ successors: %bb.7
+ %4:vgpr_32 = V_SUB_U32_e32 %1, %1, implicit $exec
+
+ bb.7:
+ successors: %bb.8
+ %5:vgpr_32 = V_AND_B32_e32 %1, %1, implicit $exec
+ S_BRANCH %bb.8
+
+ bb.8:
+ %6:vgpr_32 = V_OR_B32_e32 %1, %1, implicit $exec
+ S_ENDPGM 0
+)MIR",
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Verify we have the expected number of blocks
+ ASSERT_EQ(MF.size(), 9u) << "Should have bb.0 through bb.8";
+
+ MachineBasicBlock *BB1 = MF.getBlockNumbered(1);
+ MachineBasicBlock *BB5 = MF.getBlockNumbered(5);
+ MachineBasicBlock *BB6 = MF.getBlockNumbered(6);
+ MachineBasicBlock *BB7 = MF.getBlockNumbered(7);
+
+ // Get %1 which is defined in bb.1
+ MachineInstr *OrigDefMI = &*BB1->getFirstNonPHI();
+ Register OrigReg = OrigDefMI->getOperand(0).getReg();
+ ASSERT_TRUE(OrigReg.isValid()) << "Could not get original register";
+
+ // Count uses of %1 before SSA repair
+ unsigned UseCountBefore = 0;
+ for (const MachineInstr &MI : MRI.use_instructions(OrigReg)) {
+ (void)MI;
+ ++UseCountBefore;
+ }
+ ASSERT_GT(UseCountBefore, 0u) << "Original register should have uses";
+ llvm::errs() << "Original register has " << UseCountBefore << " uses before SSA repair\n";
+
+ // Get V_MOV opcode from bb.0
+ MachineBasicBlock *BB0 = MF.getBlockNumbered(0);
+ MachineInstr *MovInst = &*BB0->begin();
+ unsigned MovOpcode = MovInst->getOpcode();
+ Register ExecReg = MovInst->getOperand(2).getReg();
+
+ // Insert new definition in bb.5 that defines OrigReg (violating SSA)
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ auto InsertPt = BB5->getFirstNonPHI();
+ MachineInstr *NewDefMI = BuildMI(*BB5, InsertPt, DebugLoc(),
+ TII->get(MovOpcode), OrigReg)
+ .addImm(100)
+ .addReg(ExecReg, RegState::Implicit);
+
+ // Set MachineFunction properties
+ MF.getProperties().set(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
+ // Call MachineLaneSSAUpdater
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
+
+ EXPECT_TRUE(NewReg.isValid()) << "Updater should create a new register";
+ EXPECT_NE(NewReg, OrigReg) << "New register should be different from original";
+ EXPECT_EQ(NewDefMI->getOperand(0).getReg(), NewReg) << "NewDefMI should now define the new register";
+
+ // Count PHI nodes inserted and track their locations
+ unsigned PHICount = 0;
+ std::map<unsigned, unsigned> PHIsPerBlock;
+ for (MachineBasicBlock &MBB : MF) {
+ unsigned BlockPHIs = 0;
+ for (MachineInstr &MI : MBB) {
+ if (MI.isPHI()) {
+ ++PHICount;
+ ++BlockPHIs;
+ llvm::errs() << "Found PHI in BB#" << MBB.getNumber() << ": ";
+ MI.print(llvm::errs());
+ }
+ }
+ if (BlockPHIs > 0) {
+ PHIsPerBlock[MBB.getNumber()] = BlockPHIs;
+ }
+ }
+
+ llvm::errs() << "Total PHI nodes inserted: " << PHICount << "\n";
+
+ // Check for first PHI in bb.6 (joins bb.4 and bb.5)
+ bool FoundPHIInBB6 = false;
+ for (MachineInstr &MI : *BB6) {
+ if (MI.isPHI()) {
+ FoundPHIInBB6 = true;
+ llvm::errs() << "First PHI in bb.6: ";
+ MI.print(llvm::errs());
+ // Verify it has 2 incoming values (4 operands: 2 x (reg, mbb))
+ unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
+ EXPECT_EQ(NumIncoming, 2u) << "First PHI in bb.6 should have 2 incoming values (from bb.4 and bb.5)";
+ break;
+ }
+ }
+ EXPECT_TRUE(FoundPHIInBB6) << "Should have first PHI in bb.6 (joins bb.4 with %1 and bb.5 with new def)";
+
+ // Check for second PHI in bb.7 (joins bb.2 and bb.6)
+ bool FoundPHIInBB7 = false;
+ for (MachineInstr &MI : *BB7) {
+ if (MI.isPHI()) {
+ FoundPHIInBB7 = true;
+ llvm::errs() << "Second PHI in bb.7: ";
+ MI.print(llvm::errs());
+ // Verify it has 2 incoming values (4 operands: 2 x (reg, mbb))
+ unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
+ EXPECT_EQ(NumIncoming, 2u) << "Second PHI in bb.7 should have 2 incoming values (from bb.2 with %1 and bb.6 with first PHI result)";
+ break;
+ }
+ }
+ EXPECT_TRUE(FoundPHIInBB7) << "Should have second PHI in bb.7 (joins bb.2 with %1 and bb.6 with first PHI)";
+
+ // Should have exactly 2 PHIs
+ EXPECT_EQ(PHICount, 2u) << "Should have inserted exactly TWO PHI nodes (one at bb.6, one at bb.7)";
+
+ // Verify LiveIntervals are valid
+ EXPECT_TRUE(LIS.hasInterval(NewReg)) << "New register should have live interval";
+ EXPECT_TRUE(LIS.hasInterval(OrigReg)) << "Original register should have live interval";
+
+ // Debug output if verification fails
+ if (!MF.verify(nullptr, nullptr, nullptr, false)) {
+ llvm::errs() << "MachineFunction verification failed:\n";
+ MF.print(llvm::errs());
+ LIS.print(llvm::errs());
+ }
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Test 3: Subregister lane tracking with partial register updates
+//
+// This tests the "LaneAware" part of MachineLaneSSAUpdater.
+//
+// Scenario:
+// - Start with a 64-bit register %1 (has sub0 and sub1 lanes)
+// - Insert a new definition that only updates sub0 (lower 32 bits)
+// - The SSA updater should:
+// 1. Track that only sub0 lane is modified (not sub1)
+// 2. Create PHI that merges only the sub0 lane
+// 3. Preserve the original sub1 lane
+//
+// CFG:
+// bb.0
+// |
+// bb.1 (%1 = 64-bit def, both lanes)
+// / \
+// bb.2 bb.3 (new def updates only %X.sub0)
+// \ /
+// bb.4 (needs PHI for sub0 lane only)
+// |
+// bb.5 (use both lanes)
+//===----------------------------------------------------------------------===//
+
+TEST(MachineLaneSSAUpdaterTest, SubregisterLaneTracking) {
+ liveIntervalsTest(R"MIR(
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2, %bb.3
+ ; Create vregs in order: %1, %2, %3
+ %1:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
+ %3:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+
+ bb.2:
+ successors: %bb.4
+ ; Use sub0 lane only
+ %4:vgpr_32 = V_ADD_U32_e32 %3.sub0, %3.sub0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ successors: %bb.4
+ S_NOP 0
+
+ bb.4:
+ successors: %bb.5
+ ; Use both sub0 and sub1 lanes separately
+ %5:vgpr_32 = V_ADD_U32_e32 %3.sub0, %3.sub1, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.5:
+ ; Use full 64-bit register (tests REG_SEQUENCE path after PHI)
+ %6:vreg_64 = V_LSHLREV_B64_e64 0, %3, implicit $exec
+ S_ENDPGM 0
+)MIR",
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Verify we have the expected number of blocks
+ ASSERT_EQ(MF.size(), 6u) << "Should have bb.0 through bb.5";
+
+ MachineBasicBlock *BB3 = MF.getBlockNumbered(3);
+
+ // Get the 64-bit register %3 (vreg_64) from the MIR
+ Register Reg64 = Register::index2VirtReg(3);
+ ASSERT_TRUE(Reg64.isValid()) << "Register %3 should be valid";
+
+ const TargetRegisterClass *RC64 = MRI.getRegClass(Reg64);
+ ASSERT_EQ(TRI->getRegSizeInBits(*RC64), 64u) << "Register %3 should be 64-bit";
+ llvm::errs() << "Using 64-bit register: %" << Reg64.virtRegIndex() << " (raw: " << Reg64 << ")\n";
+
+ // Verify it has subranges for lane tracking
+ ASSERT_TRUE(LIS.hasInterval(Reg64)) << "Register should have live interval";
+ LiveInterval &LI = LIS.getInterval(Reg64);
+ if (LI.hasSubRanges()) {
+ llvm::errs() << "Register has subranges (lane tracking active)\n";
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ llvm::errs() << " Lane mask: " << PrintLaneMask(SR.LaneMask) << "\n";
+ }
+ } else {
+ llvm::errs() << "Warning: Register does not have subranges\n";
+ }
+
+ // Find the subreg index for a 32-bit subreg of the 64-bit register
+ unsigned Sub0Idx = 0;
+ for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx <= E; ++Idx) {
+ const TargetRegisterClass *SubRC = TRI->getSubRegisterClass(RC64, Idx);
+ if (SubRC && TRI->getRegSizeInBits(*SubRC) == 32) {
+ Sub0Idx = Idx;
+ break;
+ }
+ }
+ ASSERT_NE(Sub0Idx, 0u) << "Could not find 32-bit subregister index";
+ LaneBitmask Sub0Mask = TRI->getSubRegIndexLaneMask(Sub0Idx);
+ llvm::errs() << "Sub0 index=" << Sub0Idx << " (" << TRI->getSubRegIndexName(Sub0Idx)
+ << "), mask=" << PrintLaneMask(Sub0Mask) << "\n";
+
+ // Insert new definition in bb.3 that defines Reg64.sub0 (partial update, violating SSA)
+ // Use V_MOV with immediate - no liveness dependencies
+ // It's the caller's responsibility to ensure source operands are valid
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ auto InsertPt = BB3->getFirstNonPHI();
+
+ // Get V_MOV opcode and EXEC register from bb.0
+ MachineBasicBlock *BB0 = MF.getBlockNumbered(0);
+ MachineInstr *MovInst = &*BB0->begin();
+ unsigned MovOpcode = MovInst->getOpcode();
+ Register ExecReg = MovInst->getOperand(2).getReg();
+
+ // Create a 32-bit temporary register
+ Register TempReg = MRI.createVirtualRegister(TRI->getSubRegisterClass(RC64, Sub0Idx));
+
+ // Insert both instructions first (V_MOV and COPY)
+ MachineInstr *TempMI = BuildMI(*BB3, InsertPt, DebugLoc(), TII->get(MovOpcode), TempReg)
+ .addImm(99)
+ .addReg(ExecReg, RegState::Implicit);
+
+ MachineInstr *NewDefMI = BuildMI(*BB3, InsertPt, DebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(Reg64, RegState::Define, Sub0Idx) // %3.sub0 = (violates SSA)
+ .addReg(TempReg); // COPY from temp
+
+ // Caller's responsibility: index instructions and create live intervals
+ // Do this AFTER inserting both instructions so uses are visible
+ LIS.InsertMachineInstrInMaps(*TempMI);
+ LIS.InsertMachineInstrInMaps(*NewDefMI);
+ LIS.createAndComputeVirtRegInterval(TempReg);
+
+ // Set MachineFunction properties
+ MF.getProperties().set(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
+ // Call MachineLaneSSAUpdater to repair the SSA violation
+ // This should create a new vreg for the subreg def and insert lane-aware PHIs
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, Reg64);
+
+ llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << " (raw: " << NewReg << ")\n";
+
+ // VERIFY RESULTS:
+
+ // 1. NewReg should be a 32-bit register (for sub0), not 64-bit
+ EXPECT_TRUE(NewReg.isValid()) << "Updater should create a new register";
+ EXPECT_NE(NewReg, Reg64) << "New register should be different from original";
+
+ const TargetRegisterClass *NewRC = MRI.getRegClass(NewReg);
+ EXPECT_EQ(TRI->getRegSizeInBits(*NewRC), 32u) << "New register should be 32-bit (subreg class)";
+
+ // 2. NewDefMI should now define NewReg (not Reg64.sub0)
+ EXPECT_EQ(NewDefMI->getOperand(0).getReg(), NewReg) << "NewDefMI should now define new 32-bit register";
+ EXPECT_EQ(NewDefMI->getOperand(0).getSubReg(), 0u) << "NewDefMI should no longer have subreg index";
+
+ // 3. Verify PHIs were inserted where needed
+ MachineBasicBlock *BB4 = MF.getBlockNumbered(4);
+ bool FoundPHI = false;
+ for (MachineInstr &MI : *BB4) {
+ if (MI.isPHI()) {
+ FoundPHI = true;
+ llvm::errs() << "Found PHI in bb.4: ";
+ MI.print(llvm::errs());
+ break;
+ }
+ }
+ EXPECT_TRUE(FoundPHI) << "Should have inserted PHI for sub0 lane in bb.4";
+
+ // 4. Verify LiveIntervals are valid
+ EXPECT_TRUE(LIS.hasInterval(NewReg)) << "New register should have live interval";
+
+ // Debug output if verification fails
+ if (!MF.verify(nullptr, nullptr, nullptr, false)) {
+ llvm::errs() << "MachineFunction verification failed:\n";
+ MF.print(llvm::errs());
+ LIS.print(llvm::errs());
+ }
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Test 4: Subreg def → Full register PHI (REG_SEQUENCE before PHI)
+//
+// This tests the critical case where:
+// - Input MIR has a PHI that expects full 64-bit register from both paths
+// - We insert a subreg definition (X.sub0) on one path
+// - The updater must build a REG_SEQUENCE before the PHI to combine:
+// NewSubreg (sub0) + OriginalReg.sub1 → FullReg for PHI
+//
+// CFG:
+// bb.0 (entry)
+// |
+// bb.1 (X=1, full 64-bit def)
+// / \
+// bb.2 bb.3
+// (Y=2) / \
+// | bb.4 bb.5 (NEW DEF: X.sub0 = 3) ← inserted by test
+// | \ /
+// | bb.6 (first join: bb.4 + bb.5, may need REG_SEQUENCE)
+// | /
+// \ /
+// bb.7 (second join: PHI Z = PHI(Y, bb.2, X, bb.6)) ← already in input MIR
+// |
+// bb.8 (use Z)
+//
+// Expected: REG_SEQUENCE in bb.6 before branching to bb.7
+//===----------------------------------------------------------------------===//
+
+TEST(MachineLaneSSAUpdaterTest, SubregDefToFullRegPHI) {
+ liveIntervalsTest(R"MIR(
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2, %bb.3
+ ; X = 1 (full 64-bit register)
+ %1:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 11, implicit $exec
+ %3:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+
+ bb.2:
+ successors: %bb.7
+ ; Y = 2 (full 64-bit register, different from X)
+ %4:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
+ %5:vgpr_32 = V_MOV_B32_e32 21, implicit $exec
+ %6:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
+ S_BRANCH %bb.7
+
+ bb.3:
+ successors: %bb.4, %bb.5
+ $sgpr2 = S_MOV_B32 0
+ $sgpr3 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr2, $sgpr3, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.5, implicit $scc
+
+ bb.4:
+ successors: %bb.6
+ S_NOP 0
+ S_BRANCH %bb.6
+
+ bb.5:
+ successors: %bb.6
+ ; New def will be inserted here: X.sub0 = 3
+ S_NOP 0
+
+ bb.6:
+ successors: %bb.7
+ S_BRANCH %bb.7
+
+ bb.7:
+ ; PHI already in input MIR, expects full 64-bit from both paths
+ %7:vreg_64 = PHI %6:vreg_64, %bb.2, %3:vreg_64, %bb.6
+ S_BRANCH %bb.8
+
+ bb.8:
+ ; Use Z
+ %8:vreg_64 = V_LSHLREV_B64_e64 0, %7, implicit $exec
+ S_ENDPGM 0
+)MIR",
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ ASSERT_EQ(MF.size(), 9u) << "Should have bb.0 through bb.8";
+
+ MachineBasicBlock *BB5 = MF.getBlockNumbered(5); // New def inserted here
+ MachineBasicBlock *BB6 = MF.getBlockNumbered(6); // First join (bb.4 + bb.5)
+ MachineBasicBlock *BB7 = MF.getBlockNumbered(7); // PHI block (bb.2 + bb.6)
+
+ // Get register X (%3, the 64-bit register from bb.1)
+ Register RegX = Register::index2VirtReg(3);
+ ASSERT_TRUE(RegX.isValid()) << "Register %3 (X) should be valid";
+
+ const TargetRegisterClass *RC64 = MRI.getRegClass(RegX);
+ ASSERT_EQ(TRI->getRegSizeInBits(*RC64), 64u) << "Register X should be 64-bit";
+
+ // Find sub0 index (32-bit subregister)
+ unsigned Sub0Idx = 0;
+ for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx <= E; ++Idx) {
+ const TargetRegisterClass *SubRC = TRI->getSubRegisterClass(RC64, Idx);
+ if (SubRC && TRI->getRegSizeInBits(*SubRC) == 32) {
+ Sub0Idx = Idx;
+ break;
+ }
+ }
+ ASSERT_NE(Sub0Idx, 0u) << "Could not find 32-bit subregister index";
+
+ // Insert new definition in bb.5: X.sub0 = 3
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ auto InsertPt = BB5->getFirstNonPHI();
+
+ // Get V_MOV opcode and EXEC register
+ MachineBasicBlock *BB0 = MF.getBlockNumbered(0);
+ MachineInstr *MovInst = &*BB0->begin();
+ unsigned MovOpcode = MovInst->getOpcode();
+ Register ExecReg = MovInst->getOperand(2).getReg();
+
+ // Create temporary register
+ Register TempReg = MRI.createVirtualRegister(TRI->getSubRegisterClass(RC64, Sub0Idx));
+
+ MachineInstr *TempMI = BuildMI(*BB5, InsertPt, DebugLoc(), TII->get(MovOpcode), TempReg)
+ .addImm(30)
+ .addReg(ExecReg, RegState::Implicit);
+
+ MachineInstr *NewDefMI = BuildMI(*BB5, InsertPt, DebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(RegX, RegState::Define, Sub0Idx) // X.sub0 =
+ .addReg(TempReg);
+
+ // Index instructions and create live interval for temp
+ LIS.InsertMachineInstrInMaps(*TempMI);
+ LIS.InsertMachineInstrInMaps(*NewDefMI);
+ LIS.createAndComputeVirtRegInterval(TempReg);
+
+ // Set MachineFunction properties
+ MF.getProperties().set(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
+ // Call SSA updater
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, RegX);
+
+ llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << " (raw: " << NewReg << ")\n";
+
+ // VERIFY RESULTS:
+
+ // 1. New register should be 32-bit (subreg class)
+ EXPECT_TRUE(NewReg.isValid());
+ EXPECT_NE(NewReg, RegX);
+ const TargetRegisterClass *NewRC = MRI.getRegClass(NewReg);
+ EXPECT_EQ(TRI->getRegSizeInBits(*NewRC), 32u) << "New register should be 32-bit";
+
+ // 2. NewDefMI should now define NewReg without subreg index
+ EXPECT_EQ(NewDefMI->getOperand(0).getReg(), NewReg);
+ EXPECT_EQ(NewDefMI->getOperand(0).getSubReg(), 0u);
+
+ // 3. Check the existing PHI in bb.7
+ bool FoundPHI = false;
+ Register PHIReg;
+ MachineInstr *PHI = nullptr;
+ for (MachineInstr &MI : *BB7) {
+ if (MI.isPHI()) {
+ FoundPHI = true;
+ PHI = &MI;
+ PHIReg = MI.getOperand(0).getReg();
+ llvm::errs() << "PHI in bb.7 after SSA repair: ";
+ MI.print(llvm::errs());
+ break;
+ }
+ }
+ ASSERT_TRUE(FoundPHI) << "Should have PHI in bb.7 (from input MIR)";
+
+ // 4. CRITICAL: Check for REG_SEQUENCE in bb.6 (first join, before branch to PHI)
+ // The updater must build REG_SEQUENCE to provide full register to the PHI
+ bool FoundREGSEQ = false;
+ for (MachineInstr &MI : *BB6) {
+ if (MI.getOpcode() == TargetOpcode::REG_SEQUENCE) {
+ FoundREGSEQ = true;
+ llvm::errs() << "Found REG_SEQUENCE in bb.6: ";
+ MI.print(llvm::errs());
+
+ // Should combine new sub0 with original sub1
+ EXPECT_GE(MI.getNumOperands(), 5u) << "REG_SEQUENCE should have result + 2 source pairs";
+ break;
+ }
+ }
+ EXPECT_TRUE(FoundREGSEQ) << "Should have built REG_SEQUENCE in bb.6 to provide full register to PHI in bb.7";
+
+ // 5. Verify LiveIntervals
+ EXPECT_TRUE(LIS.hasInterval(NewReg));
+ EXPECT_TRUE(LIS.hasInterval(PHIReg));
+
+ // Debug output if verification fails
+ if (!MF.verify(nullptr, nullptr, nullptr, false)) {
+ llvm::errs() << "MachineFunction verification failed:\n";
+ MF.print(llvm::errs());
+ LIS.print(llvm::errs());
+ }
+ });
+}
+
+} // anonymous namespace
>From 5979b3c9a9dcea7ceb5a4262343a0a3a97db01a5 Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Thu, 9 Oct 2025 18:31:38 +0000
Subject: [PATCH 4/7] [CodeGen] MachineLaneSSAUpdater - added new unit tests
---
.../CodeGen/MachineLaneSSAUpdaterTest.cpp | 675 ++++++++++++++++++
1 file changed, 675 insertions(+)
diff --git a/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
index 1ac94c14aa31e..61cbd8cc60c11 100644
--- a/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
+++ b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
@@ -882,4 +882,679 @@ TEST(MachineLaneSSAUpdaterTest, SubregDefToFullRegPHI) {
});
}
+//===----------------------------------------------------------------------===//
+// Test 5: Loop with new def in loop body (PHI in loop header)
+//
+// This tests SSA repair when a new definition is inserted inside a loop,
+// requiring a PHI node in the loop header to merge:
+// - Entry path: original value from before the loop
+// - Back edge: new value from loop body
+//
+// CFG:
+// bb.0 (entry, X = 1)
+// |
+// v
+// bb.1 (loop header) ← PHI needed: %PHI = PHI(X, bb.0, NewReg, bb.2)
+// / \
+// / \
+// bb.2 bb.3 (loop exit, use X)
+// (loop
+// body,
+// new def)
+// |
+// └──→ bb.1 (back edge)
+//
+// Key test: Dominance-based PHI construction should correctly use NewReg
+// for the back edge operand since NewDefBB (bb.2) dominates the loop latch (bb.2).
+//===----------------------------------------------------------------------===//
+
+TEST(MachineLaneSSAUpdaterTest, LoopWithDefInBody) {
+ liveIntervalsTest(R"MIR(
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; Original definition of %1 (before loop)
+ %1:vgpr_32 = V_ADD_U32_e32 %0, %0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2, %bb.3
+ ; Loop header - PHI should be inserted here
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+
+ bb.2:
+ successors: %bb.1
+ ; Loop body - new def will be inserted here
+ %2:vgpr_32 = V_ADD_U32_e32 %1, %1, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.3:
+ ; Loop exit - use %1
+ %3:vgpr_32 = V_ADD_U32_e32 %1, %1, implicit $exec
+ S_ENDPGM 0
+)MIR",
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ ASSERT_EQ(MF.size(), 4u) << "Should have bb.0 through bb.3";
+
+ MachineBasicBlock *BB0 = MF.getBlockNumbered(0); // Entry with original def
+ MachineBasicBlock *BB1 = MF.getBlockNumbered(1); // Loop header
+ MachineBasicBlock *BB2 = MF.getBlockNumbered(2); // Loop body
+
+ // Get %1 (defined in bb.0, used in loop)
+ // Skip the first V_MOV instruction, get the V_ADD
+ auto It = BB0->begin();
+ ++It; // Skip %0 = V_MOV
+ MachineInstr *OrigDefMI = &*It;
+ Register OrigReg = OrigDefMI->getOperand(0).getReg();
+ ASSERT_TRUE(OrigReg.isValid()) << "Could not get original register";
+
+ llvm::errs() << "Original register: %" << OrigReg.virtRegIndex() << "\n";
+
+ // Insert new definition in loop body (bb.2)
+ // This violates SSA because %1 is defined both in bb.0 and bb.2
+ MachineInstr *MovInst = &*BB0->begin();
+ unsigned MovOpcode = MovInst->getOpcode();
+ Register ExecReg = MovInst->getOperand(2).getReg();
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ auto InsertPt = BB2->getFirstNonPHI();
+ MachineInstr *NewDefMI = BuildMI(*BB2, InsertPt, DebugLoc(),
+ TII->get(MovOpcode), OrigReg)
+ .addImm(99)
+ .addReg(ExecReg, RegState::Implicit);
+
+ // Set MachineFunction properties
+ MF.getProperties().set(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
+ // Call SSA updater
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
+
+ llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+
+ // VERIFY RESULTS:
+
+ // 1. NewReg should be valid and different from OrigReg
+ EXPECT_TRUE(NewReg.isValid());
+ EXPECT_NE(NewReg, OrigReg);
+
+ // 2. NewDefMI should now define NewReg
+ EXPECT_EQ(NewDefMI->getOperand(0).getReg(), NewReg);
+
+ // 3. PHI should be inserted in loop header (bb.1)
+ bool FoundPHIInHeader = false;
+ for (MachineInstr &MI : *BB1) {
+ if (MI.isPHI()) {
+ FoundPHIInHeader = true;
+ llvm::errs() << "Found PHI in loop header (bb.1): ";
+ MI.print(llvm::errs());
+
+ // Verify PHI has 2 incoming values
+ unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
+ EXPECT_EQ(NumIncoming, 2u) << "Loop header PHI should have 2 incoming values";
+
+ // Check the operands
+ // One should be from bb.0 (entry, using OrigReg)
+ // One should be from bb.2 (back edge, using NewReg)
+ bool HasEntryPath = false;
+ bool HasBackEdge = false;
+
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ Register IncomingReg = MI.getOperand(i).getReg();
+ MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB();
+
+ if (IncomingMBB == BB0) {
+ HasEntryPath = true;
+ EXPECT_EQ(IncomingReg, OrigReg) << "Entry path should use OrigReg";
+ llvm::errs() << " Entry path (bb.0): %" << IncomingReg.virtRegIndex() << "\n";
+ } else if (IncomingMBB == BB2) {
+ HasBackEdge = true;
+ EXPECT_EQ(IncomingReg, NewReg) << "Back edge should use NewReg";
+ llvm::errs() << " Back edge (bb.2): %" << IncomingReg.virtRegIndex() << "\n";
+ }
+ }
+
+ EXPECT_TRUE(HasEntryPath) << "PHI should have entry path from bb.0";
+ EXPECT_TRUE(HasBackEdge) << "PHI should have back edge from bb.2";
+
+ break;
+ }
+ }
+ EXPECT_TRUE(FoundPHIInHeader) << "Should have inserted PHI in loop header (bb.1)";
+
+ // 4. Verify LiveIntervals are valid
+ EXPECT_TRUE(LIS.hasInterval(NewReg));
+ EXPECT_TRUE(LIS.hasInterval(OrigReg));
+
+ // Debug output if verification fails
+ if (!MF.verify(nullptr, nullptr, nullptr, false)) {
+ llvm::errs() << "MachineFunction verification failed:\n";
+ MF.print(llvm::errs());
+ LIS.print(llvm::errs());
+ }
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Test 6: Complex loop with diamond CFG and use-before-def
+//
+// This is the most comprehensive test combining multiple SSA repair scenarios:
+// 1. Loop with existing PHI (induction variable)
+// 2. Use before redefinition (in loop header)
+// 3. New definition in one branch of if-then-else diamond
+// 4. PHI1 at diamond join
+// 5. PHI2 at loop header (merges entry value and PHI1 result from back edge)
+// 6. Use after diamond (in latch) should use PHI1 result
+//
+// CFG:
+// bb.0 (entry: X=1, i=0)
+// |
+// v
+// bb.1 (loop header)
+// PHI_i = PHI(0, bb.0, i+1, bb.5) [already in input MIR]
+// PHI2 = PHI(X, bb.0, PHI1, bb.5) [created by SSA updater]
+// USE X (before redef!) [rewritten to PHI2]
+// if (i < 10)
+// / \
+// bb.2 bb.3 (NEW DEF: X=99)
+// (then) (else)
+// | |
+// \ /
+// \/
+// bb.4 (diamond join)
+// PHI1 = PHI(X, bb.2, NewReg, bb.3) [created by SSA updater]
+// |
+// v
+// bb.5 (latch)
+// USE X [rewritten to PHI1]
+// i = i + 1
+// branch to bb.1
+// |
+// bb.6 (exit)
+// USE X
+//===----------------------------------------------------------------------===//
+
+TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
+ liveIntervalsTest(R"MIR(
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; X = 1 (the register we'll redefine in loop)
+ %1:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; i = 0 (induction variable)
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2, %bb.3
+ ; Loop header with existing PHI for induction variable
+ %3:vgpr_32 = PHI %2:vgpr_32, %bb.0, %10:vgpr_32, %bb.5
+ ; USE X before redefinition - should be rewritten to PHI2
+ %4:vgpr_32 = V_ADD_U32_e32 %1, %1, implicit $exec
+ ; Check if i < 10
+ %5:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+
+ bb.2:
+ successors: %bb.4
+ ; Then branch - X unchanged
+ S_NOP 0
+ S_BRANCH %bb.4
+
+ bb.3:
+ successors: %bb.4
+ ; Else branch - NEW DEF will be inserted here: X = 99
+ S_NOP 0
+
+ bb.4:
+ successors: %bb.5
+ ; Diamond join - PHI1 should be created here
+ S_NOP 0
+
+ bb.5:
+ successors: %bb.1, %bb.6
+ ; Loop latch - USE X (should be rewritten to PHI1)
+ %8:vgpr_32 = V_SUB_U32_e32 %1, %1, implicit $exec
+ ; i = i + 1
+ %9:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ %10:vgpr_32 = V_ADD_U32_e32 %3, %9, implicit $exec
+ ; Check loop condition
+ $sgpr2 = S_MOV_B32 0
+ $sgpr3 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr2, $sgpr3, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.6, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.6:
+ ; Loop exit - USE X
+ %11:vgpr_32 = V_OR_B32_e32 %1, %1, implicit $exec
+ S_ENDPGM 0
+)MIR",
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ ASSERT_EQ(MF.size(), 7u) << "Should have bb.0 through bb.6";
+
+ MachineBasicBlock *BB0 = MF.getBlockNumbered(0); // Entry
+ MachineBasicBlock *BB1 = MF.getBlockNumbered(1); // Loop header
+ MachineBasicBlock *BB2 = MF.getBlockNumbered(2); // Then
+ MachineBasicBlock *BB3 = MF.getBlockNumbered(3); // Else (new def here)
+ MachineBasicBlock *BB4 = MF.getBlockNumbered(4); // Diamond join
+ MachineBasicBlock *BB5 = MF.getBlockNumbered(5); // Latch
+
+ // Get %1 (X, defined in bb.0)
+ auto It = BB0->begin();
+ ++It; // Skip %0 = V_MOV_B32_e32 0
+ MachineInstr *OrigDefMI = &*It; // %1 = V_MOV_B32_e32 1
+ Register OrigReg = OrigDefMI->getOperand(0).getReg();
+ ASSERT_TRUE(OrigReg.isValid()) << "Could not get original register X";
+
+ llvm::errs() << "Original register X: %" << OrigReg.virtRegIndex() << "\n";
+
+ // Find the use-before-def in bb.1 (loop header)
+ MachineInstr *UseBeforeDefMI = nullptr;
+ for (MachineInstr &MI : *BB1) {
+ if (!MI.isPHI() && MI.getOpcode() != TargetOpcode::IMPLICIT_DEF) {
+ // First non-PHI instruction should be V_ADD using %1
+ if (MI.getNumOperands() >= 3 && MI.getOperand(1).isReg() &&
+ MI.getOperand(1).getReg() == OrigReg) {
+ UseBeforeDefMI = &MI;
+ break;
+ }
+ }
+ }
+ ASSERT_TRUE(UseBeforeDefMI) << "Could not find use-before-def in loop header";
+ llvm::errs() << "Found use-before-def in bb.1: %"
+ << UseBeforeDefMI->getOperand(0).getReg().virtRegIndex() << "\n";
+
+ // Insert new definition in bb.3 (else branch): X = 99
+ MachineInstr *MovInst = &*BB0->begin();
+ unsigned MovOpcode = MovInst->getOpcode();
+ Register ExecReg = MovInst->getOperand(2).getReg();
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ auto InsertPt = BB3->getFirstNonPHI();
+ MachineInstr *NewDefMI = BuildMI(*BB3, InsertPt, DebugLoc(),
+ TII->get(MovOpcode), OrigReg)
+ .addImm(99)
+ .addReg(ExecReg, RegState::Implicit);
+
+ // Set MachineFunction properties
+ MF.getProperties().set(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
+ // Call SSA updater
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
+
+ llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+
+ // VERIFY RESULTS:
+
+ // 1. NewReg should be valid and different from OrigReg
+ EXPECT_TRUE(NewReg.isValid());
+ EXPECT_NE(NewReg, OrigReg);
+ EXPECT_EQ(NewDefMI->getOperand(0).getReg(), NewReg);
+
+ // 2. PHI1 should exist in diamond join (bb.4)
+ bool FoundPHI1 = false;
+ Register PHI1Reg;
+ for (MachineInstr &MI : *BB4) {
+ if (MI.isPHI()) {
+ FoundPHI1 = true;
+ PHI1Reg = MI.getOperand(0).getReg();
+ llvm::errs() << "Found PHI1 in diamond join (bb.4): ";
+ MI.print(llvm::errs());
+
+ // Should have 2 incoming: OrigReg from bb.2, NewReg from bb.3
+ unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
+ EXPECT_EQ(NumIncoming, 2u) << "Diamond join PHI should have 2 incoming";
+ break;
+ }
+ }
+ EXPECT_TRUE(FoundPHI1) << "Should have PHI1 in diamond join (bb.4)";
+
+ // 3. PHI2 should exist in loop header (bb.1)
+ // First, count all PHIs
+ unsigned TotalPHICount = 0;
+ for (MachineInstr &MI : *BB1) {
+ if (MI.isPHI())
+ TotalPHICount++;
+ }
+ llvm::errs() << "Total PHIs in loop header: " << TotalPHICount << "\n";
+ EXPECT_EQ(TotalPHICount, 2u) << "Loop header should have 2 PHIs (induction var + SSA repair)";
+
+ // Now find the SSA repair PHI (not the induction variable PHI %3)
+ bool FoundPHI2 = false;
+ Register PHI2Reg;
+ Register InductionVarPHI = Register::index2VirtReg(3); // %3 from input MIR
+ for (MachineInstr &MI : *BB1) {
+ if (MI.isPHI()) {
+ Register PHIResult = MI.getOperand(0).getReg();
+
+ // Skip the induction variable PHI (%3 from input MIR) when looking for SSA repair PHI
+ if (PHIResult == InductionVarPHI)
+ continue;
+
+ FoundPHI2 = true;
+ PHI2Reg = PHIResult;
+ llvm::errs() << "Found PHI2 (SSA repair) in loop header (bb.1): ";
+ MI.print(llvm::errs());
+
+ // Should have 2 incoming: OrigReg from bb.0, PHI1Reg from bb.5
+ unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
+ EXPECT_EQ(NumIncoming, 2u) << "Loop header PHI2 should have 2 incoming";
+
+ // Verify operands
+ bool HasEntryPath = false;
+ bool HasBackEdge = false;
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ Register IncomingReg = MI.getOperand(i).getReg();
+ MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB();
+
+ if (IncomingMBB == BB0) {
+ HasEntryPath = true;
+ EXPECT_EQ(IncomingReg, OrigReg) << "Entry path should use OrigReg";
+ } else if (IncomingMBB == BB5) {
+ HasBackEdge = true;
+ EXPECT_EQ(IncomingReg, PHI1Reg) << "Back edge should use PHI1 result";
+ }
+ }
+
+ EXPECT_TRUE(HasEntryPath) << "PHI2 should have entry path from bb.0";
+ EXPECT_TRUE(HasBackEdge) << "PHI2 should have back edge from bb.5";
+ break;
+ }
+ }
+ EXPECT_TRUE(FoundPHI2) << "Should have PHI2 (SSA repair) in loop header (bb.1)";
+
+ // 4. Use-before-def in bb.1 should be rewritten to PHI2
+ EXPECT_EQ(UseBeforeDefMI->getOperand(1).getReg(), PHI2Reg)
+ << "Use-before-def should be rewritten to PHI2 result";
+ llvm::errs() << "Use-before-def correctly rewritten to PHI2: %"
+ << PHI2Reg.virtRegIndex() << "\n";
+
+ // 5. Use in latch (bb.5) should be rewritten to PHI1
+ // Find instruction using PHI1 (originally used %1)
+ bool FoundLatchUse = false;
+ for (MachineInstr &MI : *BB5) {
+ // Skip PHIs and branches
+ if (MI.isPHI() || MI.isBranch())
+ continue;
+
+ // Look for any instruction that uses PHI1Reg
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == PHI1Reg) {
+ llvm::errs() << "Latch use correctly rewritten to PHI1: %"
+ << PHI1Reg.virtRegIndex() << " in: ";
+ MI.print(llvm::errs());
+ FoundLatchUse = true;
+ break;
+ }
+ }
+ if (FoundLatchUse)
+ break;
+ }
+ EXPECT_TRUE(FoundLatchUse) << "Should find use of PHI1 in latch (bb.5)";
+
+ // 6. Verify LiveIntervals
+ EXPECT_TRUE(LIS.hasInterval(NewReg));
+ EXPECT_TRUE(LIS.hasInterval(PHI1Reg));
+ EXPECT_TRUE(LIS.hasInterval(PHI2Reg));
+
+ // Debug output if verification fails
+ if (!MF.verify(nullptr, nullptr, nullptr, false)) {
+ llvm::errs() << "MachineFunction verification failed:\n";
+ MF.print(llvm::errs());
+ LIS.print(llvm::errs());
+ }
+ });
+}
+
+// Test: Multiple subreg redefinitions in loop (X.sub0 in one branch, X.sub1 in latch)
+// This tests the most complex scenario: two separate lane redefinitions with REG_SEQUENCE
+// composition at the backedge.
+TEST(MachineLaneSSAUpdaterTest, MultipleSubregRedefsInLoop) {
+ SmallString<2048> S;
+ StringRef MIRString = (Twine(R"MIR(
+--- |
+ define amdgpu_kernel void @func() { ret void }
+...
+---
+name: func
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vreg_64 }
+body: |
+ bb.0:
+ successors: %bb.1
+ %1:vreg_64 = IMPLICIT_DEF
+
+ bb.1:
+ successors: %bb.2, %bb.5
+ %0:vreg_64 = PHI %1:vreg_64, %bb.0, %3:vreg_64, %bb.3
+ %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
+ dead %4:vgpr_32 = V_ADD_U32_e32 %0.sub0:vreg_64, %2:vgpr_32, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.5
+
+ bb.2:
+ successors: %bb.3
+ dead %5:vgpr_32 = V_MOV_B32_e32 %0.sub1:vreg_64, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.5:
+ successors: %bb.3
+ dead %6:vgpr_32 = V_MOV_B32_e32 %0.sub0:vreg_64, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ successors: %bb.1, %bb.4
+ %3:vreg_64 = V_LSHLREV_B64_e64 1, %0:vreg_64, implicit $exec
+ $sgpr2 = S_MOV_B32 0
+ $sgpr3 = S_MOV_B32 10
+ S_CMP_LT_U32 $sgpr2, $sgpr3, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_ENDPGM 0
+...
+)MIR")).toNullTerminatedStringRef(S);
+
+ doTest<LiveIntervalsWrapperPass>(MIRString,
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ llvm::errs() << "\n=== MultipleSubregRedefsInLoop Test ===\n";
+
+ // Get basic blocks
+ auto BBI = MF.begin();
+ MachineBasicBlock *BB0 = &*BBI++; // Entry
+ MachineBasicBlock *BB1 = &*BBI++; // Loop header
+ MachineBasicBlock *BB2 = &*BBI++; // True branch (uses X.HI)
+ MachineBasicBlock *BB5 = &*BBI++; // False branch (uses X.LO, INSERT def X.LO)
+ MachineBasicBlock *BB3 = &*BBI++; // Latch (increment, INSERT def X.HI)
+ MachineBasicBlock *BB4 = &*BBI++; // Exit
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Find the 64-bit register and its subregister indices
+ Register OrigReg = Register::index2VirtReg(0); // %0 from MIR
+ ASSERT_TRUE(OrigReg.isValid()) << "Register %0 should be valid";
+
+ const TargetRegisterClass *RC64 = MRI.getRegClass(OrigReg);
+ unsigned Sub0Idx = 0, Sub1Idx = 0;
+
+ // Find sub0 (low 32 bits) and sub1 (high 32 bits)
+ for (unsigned Idx = 1; Idx < TRI->getNumSubRegIndices(); ++Idx) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(Idx);
+ unsigned SubRegSize = TRI->getSubRegIdxSize(Idx);
+
+ if (SubRegSize == 32) {
+ if (Mask.getAsInteger() == 0x3) { // Low lanes
+ Sub0Idx = Idx;
+ } else if (Mask.getAsInteger() == 0xC) { // High lanes
+ Sub1Idx = Idx;
+ }
+ }
+ }
+
+ ASSERT_NE(Sub0Idx, 0u) << "Should find sub0 index";
+ ASSERT_NE(Sub1Idx, 0u) << "Should find sub1 index";
+
+ llvm::errs() << "Using 64-bit register: %" << OrigReg.virtRegIndex()
+ << " with sub0=" << Sub0Idx << ", sub1=" << Sub1Idx << "\n";
+
+ // Get V_MOV opcode and EXEC register from existing instruction
+ MachineInstr *MovInst = nullptr;
+ Register ExecReg;
+ for (MachineInstr &MI : *BB1) {
+ if (!MI.isPHI() && MI.getNumOperands() >= 3 && MI.getOperand(2).isReg()) {
+ MovInst = &MI;
+ ExecReg = MI.getOperand(2).getReg();
+ break;
+ }
+ }
+ ASSERT_NE(MovInst, nullptr) << "Should find V_MOV in BB1";
+ unsigned MovOpcode = MovInst->getOpcode();
+
+ // === FIRST INSERTION: X.sub0 in BB5 (else branch) ===
+ llvm::errs() << "\n=== First insertion: X.sub0 in BB5 ===\n";
+
+ // Find insertion point in BB5 (after the use of X.sub0)
+ MachineInstr *InsertPoint1 = nullptr;
+ for (MachineInstr &MI : *BB5) {
+ if (MI.isBranch()) {
+ InsertPoint1 = &MI;
+ break;
+ }
+ }
+ ASSERT_NE(InsertPoint1, nullptr) << "Should find branch in BB5";
+
+ // Create first new def: X.sub0 = 99
+ MachineInstrBuilder MIB1 = BuildMI(*BB5, InsertPoint1, DebugLoc(),
+ TII->get(MovOpcode))
+ .addReg(OrigReg, RegState::Define, Sub0Idx)
+ .addImm(99)
+ .addReg(ExecReg, RegState::Implicit);
+
+ MachineInstr &NewDefMI1 = *MIB1;
+ llvm::errs() << "Created first def in BB5: ";
+ NewDefMI1.print(llvm::errs());
+
+ // Create SSA updater and repair after first insertion
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg1 = Updater.repairSSAForNewDef(NewDefMI1, OrigReg);
+
+ llvm::errs() << "SSA repair #1 created new register: %" << NewReg1.virtRegIndex() << "\n";
+
+ // === SECOND INSERTION: X.sub1 in BB3 (after increment) ===
+ llvm::errs() << "\n=== Second insertion: X.sub1 in BB3 (after increment) ===\n";
+
+ // Find the increment instruction in BB3 (look for vreg_64 def)
+ MachineInstr *IncrementMI = nullptr;
+ Register IncrementReg;
+ for (MachineInstr &MI : *BB3) {
+ if (!MI.isPHI() && MI.getNumOperands() > 0 && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).isDef()) {
+ Register DefReg = MI.getOperand(0).getReg();
+ if (DefReg.isVirtual() && DefReg == Register::index2VirtReg(3)) {
+ IncrementMI = &MI;
+ IncrementReg = DefReg; // This is %3
+ llvm::errs() << "Found increment: ";
+ MI.print(llvm::errs());
+ break;
+ }
+ }
+ }
+ ASSERT_NE(IncrementMI, nullptr) << "Should find increment (def of %3) in BB3";
+ ASSERT_TRUE(IncrementReg.isValid()) << "Increment register should be valid";
+
+ // Create second new def: %3.sub1 = 200 (redefine increment result's sub1)
+ MachineBasicBlock::iterator InsertPoint2 = std::next(IncrementMI->getIterator());
+ MachineInstrBuilder MIB2 = BuildMI(*BB3, InsertPoint2, DebugLoc(),
+ TII->get(MovOpcode))
+ .addReg(IncrementReg, RegState::Define, Sub1Idx) // Redefine %3.sub1, not %0.sub1!
+ .addImm(200)
+ .addReg(ExecReg, RegState::Implicit);
+
+ MachineInstr &NewDefMI2 = *MIB2;
+ llvm::errs() << "Created second def in BB3 (redefining %3.sub1): ";
+ NewDefMI2.print(llvm::errs());
+
+ // Repair SSA after second insertion (for %3, the increment result)
+ Register NewReg2 = Updater.repairSSAForNewDef(NewDefMI2, IncrementReg);
+
+ llvm::errs() << "SSA repair #2 created new register: %" << NewReg2.virtRegIndex() << "\n";
+
+ // === Verification ===
+ llvm::errs() << "\n=== Verification ===\n";
+
+ // Print final MIR
+ llvm::errs() << "Final BB3 (latch):\n";
+ for (MachineInstr &MI : *BB3) {
+ MI.print(llvm::errs());
+ }
+
+ // 1. Should have PHI for 32-bit X.sub0 at BB3 (diamond join)
+ bool FoundSub0PHI = false;
+ for (MachineInstr &MI : *BB3) {
+ if (MI.isPHI()) {
+ Register PHIResult = MI.getOperand(0).getReg();
+ if (PHIResult != Register::index2VirtReg(3)) { // Not the increment result PHI
+ FoundSub0PHI = true;
+ llvm::errs() << "Found sub0 PHI in BB3: ";
+ MI.print(llvm::errs());
+ }
+ }
+ }
+
+ // 2. Should have REG_SEQUENCE in BB3 before backedge to compose full 64-bit
+ bool FoundREGSEQ = false;
+ for (MachineInstr &MI : *BB3) {
+ if (MI.getOpcode() == TargetOpcode::REG_SEQUENCE) {
+ FoundREGSEQ = true;
+ llvm::errs() << "Found REG_SEQUENCE in BB3: ";
+ MI.print(llvm::errs());
+
+ // Verify it composes both lanes
+ unsigned NumSources = (MI.getNumOperands() - 1) / 2;
+ EXPECT_GE(NumSources, 2u) << "REG_SEQUENCE should have at least 2 sources (sub0 and sub1)";
+ }
+ }
+
+ EXPECT_TRUE(FoundREGSEQ) << "Should have REG_SEQUENCE at backedge in BB3";
+
+ // 3. Verify LiveIntervals
+ EXPECT_TRUE(LIS.hasInterval(NewReg1));
+ EXPECT_TRUE(LIS.hasInterval(NewReg2));
+
+ // Debug output if verification fails
+ if (!MF.verify(nullptr, nullptr, nullptr, false)) {
+ llvm::errs() << "MachineFunction verification failed:\n";
+ MF.print(llvm::errs());
+ LIS.print(llvm::errs());
+ }
+ });
+}
+
} // anonymous namespace
>From 99370ca7dadb96caac1453d5bf8d6f088f7584ca Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Fri, 10 Oct 2025 21:10:03 +0000
Subject: [PATCH 5/7] [CodeGen] MachineLaneSSAUpdater - all unit test for all
cases of the usual new definition handling finished
---
.../llvm/CodeGen/MachineLaneSSAUpdater.h | 3 +-
llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp | 181 +++-
.../CodeGen/MachineLaneSSAUpdaterTest.cpp | 929 +++++++++++++++++-
3 files changed, 1038 insertions(+), 75 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
index 2c49c63f6b3ff..4f5b7340b179d 100644
--- a/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
+++ b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
@@ -186,7 +186,8 @@ class MachineLaneSSAUpdater {
// Helper: Create PHI in a specific block with per-edge lane analysis
Register createPHIInBlock(MachineBasicBlock &JoinMBB,
Register OrigVReg,
- Register NewVReg);
+ Register NewVReg,
+ LaneBitmask DefMask);
// Rewrite dominated uses of OrigVReg to NewSSA according to the
// exact/subset/super policy; create REG_SEQUENCE only when needed.
diff --git a/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
index 9bcb4e52fe44a..64c2b5d1706c5 100644
--- a/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
@@ -355,14 +355,22 @@ void MachineLaneSSAUpdater::performSSARepair(Register NewVReg, Register OrigVReg
PHILI.RenumberValues();
}
- // Also renumber original interval if it was modified
- LiveInterval &OrigLI = LIS.getInterval(OrigVReg);
- OrigLI.RenumberValues();
-
- // Recompute OrigVReg's LiveInterval after rewriting uses
- // Some uses may have been rewritten to NewVReg or PHI registers,
- // so OrigVReg's live range may need to shrink
- LIS.shrinkToUses(&OrigLI);
+ // Recompute OrigVReg's LiveInterval to account for PHI operands
+ // We do a full recomputation because PHI operands may reference subregisters
+ // that weren't previously live on those paths, and we need to extend liveness
+ // from the definition to the PHI use.
+ LIS.removeInterval(OrigVReg);
+ LIS.createAndComputeVirtRegInterval(OrigVReg);
+
+ // Note: We do NOT call shrinkToUses on OrigVReg even after recomputation because:
+ // shrinkToUses has a fundamental bug with PHI operands - it doesn't understand
+ // that PHI operands require their source lanes to be live at the END of
+ // predecessor blocks. When it sees a PHI operand like "%0.sub2_sub3" from BB3,
+ // it only considers the PHI location (start of join block), not the predecessor
+ // end where the value must be available. This causes it to incorrectly shrink
+ // away lanes that ARE needed by PHI operands, leading to verification errors:
+ // "Not all lanes of PHI source live at use". The createAndComputeVirtRegInterval
+ // already produces correct, minimal liveness that includes PHI uses properly.
// Step 4: Update operand flags to match the LiveIntervals
updateDeadFlags(NewVReg);
@@ -587,7 +595,7 @@ SmallVector<Register> MachineLaneSSAUpdater::insertLaneAwarePHI(Register Initial
<< " with CurrentNewVReg=" << CurrentNewVReg << "\n");
// Create PHI: merges OrigVReg and CurrentNewVReg based on dominance
- Register PHIResult = createPHIInBlock(*JoinMBB, OrigVReg, CurrentNewVReg);
+ Register PHIResult = createPHIInBlock(*JoinMBB, OrigVReg, CurrentNewVReg, DefMask);
if (PHIResult.isValid()) {
AllCreatedPHIs.push_back(PHIResult);
@@ -610,22 +618,23 @@ SmallVector<Register> MachineLaneSSAUpdater::insertLaneAwarePHI(Register Initial
// Helper: Create lane-specific PHI in a join block
Register MachineLaneSSAUpdater::createPHIInBlock(MachineBasicBlock &JoinMBB,
Register OrigVReg,
- Register NewVReg) {
+ Register NewVReg,
+ LaneBitmask DefMask) {
LLVM_DEBUG(dbgs() << " createPHIInBlock in BB#" << JoinMBB.getNumber()
- << " OrigVReg=" << OrigVReg << " NewVReg=" << NewVReg << "\n");
+ << " OrigVReg=" << OrigVReg << " NewVReg=" << NewVReg
+ << " DefMask=" << PrintLaneMask(DefMask) << "\n");
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const LaneBitmask FullMask = MF.getRegInfo().getMaxLaneMaskForVReg(OrigVReg);
- // Derive DefMask from NewVReg's register class (matches reload size)
- const LaneBitmask ReloadMask = MF.getRegInfo().getMaxLaneMaskForVReg(NewVReg);
- const bool IsPartialReload = (FullMask & ~ReloadMask).any();
+ // Check if this is a partial lane redefinition
+ const bool IsPartialReload = (DefMask != FullMask);
// Collect PHI operands for the specific reload lanes
SmallVector<MachineOperand> PHIOperands;
LLVM_DEBUG(dbgs() << " Creating PHI for " << (IsPartialReload ? "partial reload" : "full reload")
- << " ReloadMask=" << PrintLaneMask(ReloadMask) << "\n");
+ << " DefMask=" << PrintLaneMask(DefMask) << "\n");
// Get the definition block of NewVReg for dominance checks
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -651,8 +660,9 @@ Register MachineLaneSSAUpdater::createPHIInBlock(MachineBasicBlock &JoinMBB,
<< " contributes OrigVReg (original path)\n");
if (IsPartialReload) {
- // Partial case: z = PHI(y, BB1, x.sub0, BB0)
- unsigned SubIdx = getSubRegIndexForLaneMask(ReloadMask, &TRI);
+ // Partial case: z = PHI(y, BB1, x.sub2_3, BB0)
+ // Use DefMask to find which subreg of OrigVReg was redefined
+ unsigned SubIdx = getSubRegIndexForLaneMask(DefMask, &TRI);
PHIOperands.push_back(MachineOperand::CreateReg(OrigVReg, /*isDef*/ false,
/*isImp*/ false, /*isKill*/ false,
/*isDead*/ false, /*isUndef*/ false,
@@ -681,6 +691,7 @@ Register MachineLaneSSAUpdater::createPHIInBlock(MachineBasicBlock &JoinMBB,
LLVM_DEBUG(dbgs() << " Created lane-specific PHI: ");
LLVM_DEBUG(PHI->print(dbgs()));
+
return PHIVReg;
}
@@ -790,13 +801,49 @@ void MachineLaneSSAUpdater::rewriteDominatedUses(Register OrigVReg,
updateDeadFlags(RSReg);
} else {
- // Case 3: Subset - use needs fewer lanes, keep subregister index
- LLVM_DEBUG(dbgs() << " Subset case -> keeping subregister\n");
- unsigned SubReg = MO.getSubReg();
- assert(SubReg && "Subset case should have subregister");
+ // Case 3: Subset - use needs fewer lanes than NewSSA provides
+ // Need to remap subregister index from OrigVReg's register class to NewSSA's register class
+ //
+ // Example: OrigVReg is vreg_128, we redefine sub2_3 (64-bit), use accesses sub3 (32-bit)
+ // MaskToRewrite = 0xF0 // sub2_3: lanes 4-7 in vreg_128 space
+ // OpMask = 0xC0 // sub3: lanes 6-7 in vreg_128 space
+ // NewSSA is vreg_64, has lanes 0-3 (but represents lanes 4-7 of OrigVReg)
+ //
+ // Algorithm: Shift OpMask down by the bit position of MaskToRewrite's LSB to map
+ // from OrigVReg's lane space into NewSSA's lane space, then find the subreg index.
+ //
+ // Why this works:
+ // 1. MaskToRewrite is contiguous (comes from subreg definition)
+ // 2. OpMask ⊆ MaskToRewrite (we're in subset case by construction)
+ // 3. Lane masks use bit positions that correspond to actual lane indices
+ // 4. Subreg boundaries are power-of-2 aligned in register class design
+ //
+ // Calculation:
+ // Shift = countTrailingZeros(MaskToRewrite) = 4 // How far "up" MaskToRewrite is
+ // NewMask = OpMask >> 4 = 0xC0 >> 4 = 0xC // Map to NewSSA's lane space
+ // 0xC corresponds to sub1 in vreg_64 ✓
+ LLVM_DEBUG(dbgs() << " Subset case -> remapping subregister index\n");
+
+ // Find the bit offset of MaskToRewrite (position of its lowest set bit)
+ unsigned ShiftAmt = llvm::countr_zero(MaskToRewrite.getAsInteger());
+ assert(ShiftAmt < 64 && "MaskToRewrite should have at least one bit set");
+
+ // Shift OpMask down into NewSSA's lane space
+ LaneBitmask NewMask = LaneBitmask(OpMask.getAsInteger() >> ShiftAmt);
+
+ // Find the subregister index for NewMask in NewSSA's register class
+ unsigned NewSubReg = getSubRegIndexForLaneMask(NewMask, &TRI);
+ assert(NewSubReg && "Should find subreg index for remapped lanes");
+
+ LLVM_DEBUG(dbgs() << " Remapping subreg:\n"
+ << " OrigVReg lanes: OpMask=" << PrintLaneMask(OpMask)
+ << " MaskToRewrite=" << PrintLaneMask(MaskToRewrite) << "\n"
+ << " Shift amount: " << ShiftAmt << "\n"
+ << " NewSSA lanes: NewMask=" << PrintLaneMask(NewMask)
+ << " -> SubReg=" << TRI.getSubRegIndexName(NewSubReg) << "\n");
MO.setReg(NewSSA);
- // Keep the existing subregister index
+ MO.setSubReg(NewSubReg);
// Extend NewSSA's live interval to cover this use
SlotIndex UseIdx = LIS.getInstructionIndex(*UseMI).getRegSlot();
@@ -861,6 +908,58 @@ LaneBitmask MachineLaneSSAUpdater::operandLaneMask(const MachineOperand &MO) {
return MRI.getMaxLaneMaskForVReg(MO.getReg());
}
+/// Helper: Decompose a potentially non-contiguous lane mask into a vector of
+/// subregister indices that together cover all lanes in the mask.
+/// From getCoveringSubRegsForLaneMask in AMDGPUSSARAUtils.h (PR #156049).
+///
+/// Key algorithm: Sort candidates by lane count (prefer larger subregs) to get
+/// minimal covering set with largest possible subregisters.
+///
+/// Example: For vreg_128 with LaneMask = 0x0F | 0xF0 (sub0 + sub2, skipping sub1)
+/// Returns: [sub0_idx, sub2_idx] (not lo16, hi16, sub2, sub3)
+static SmallVector<unsigned, 4> getCoveringSubRegsForLaneMask(
+ LaneBitmask Mask, const TargetRegisterInfo *TRI,
+ const TargetRegisterClass *RC) {
+ if (Mask.none())
+ return {};
+
+ // Step 1: Collect all candidate subregisters that overlap with Mask
+ SmallVector<unsigned, 4> Candidates;
+ for (unsigned SubIdx = 1; SubIdx < TRI->getNumSubRegIndices(); ++SubIdx) {
+ // Check if this subreg index is valid for this register class
+ if (!TRI->getSubRegisterClass(RC, SubIdx))
+ continue;
+
+ LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubIdx);
+ // Add if it covers any lanes we need
+ if ((SubMask & Mask).any()) {
+ Candidates.push_back(SubIdx);
+ }
+ }
+
+ // Step 2: Sort by number of lanes (descending) to prefer larger subregisters
+ llvm::stable_sort(Candidates, [&](unsigned A, unsigned B) {
+ return TRI->getSubRegIndexLaneMask(A).getNumLanes() >
+ TRI->getSubRegIndexLaneMask(B).getNumLanes();
+ });
+
+ // Step 3: Greedily select subregisters, largest first
+ SmallVector<unsigned, 4> OptimalSubIndices;
+ for (unsigned SubIdx : Candidates) {
+ LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubIdx);
+ // Only add if this subreg is fully contained in the remaining mask
+ if ((Mask & SubMask) == SubMask) {
+ OptimalSubIndices.push_back(SubIdx);
+ Mask &= ~SubMask; // Remove covered lanes
+
+ if (Mask.none())
+ break; // All lanes covered
+ }
+ }
+
+ return OptimalSubIndices;
+}
+
/// Build a REG_SEQUENCE to materialize a super-reg/mixed-lane use.
/// Inserts at the PHI predecessor terminator (for PHI uses) or right before
/// UseMI otherwise. Returns the new full-width vreg, the RS index via OutIdx,
@@ -917,12 +1016,42 @@ Register MachineLaneSSAUpdater::buildRSForSuperUse(MachineInstr *UseMI, MachineO
}
// Add source for lanes from OldVR (unchanged lanes)
+ // Handle both contiguous and non-contiguous lane masks
+ // Non-contiguous example: Redefining only sub2 of vreg_128 leaves LanesFromOld = sub0+sub1+sub3
+ // Reference: getCoveringSubRegsForLaneMask from AMDGPUSSARAUtils.h (PR #156049)
+ // See: https://github.com/llvm/llvm-project/pull/156049/files#diff-b52a7e2e5b6c174847c74c25b3b579f8cfbac5d53c3364b9b69c52de71532aec
if (LanesFromOld.any()) {
unsigned SubIdx = getSubRegIndexForLaneMask(LanesFromOld, &TRI);
- assert(SubIdx && "Failed to find subregister index for LanesFromOld");
- RS.addReg(OldVR, 0, SubIdx).addImm(SubIdx); // OldVR.subIdx
- AddedSubIdxs.insert(SubIdx);
- LanesToExtend.push_back(LanesFromOld);
+
+ if (SubIdx) {
+ // Contiguous case: single subregister covers all lanes
+ RS.addReg(OldVR, 0, SubIdx).addImm(SubIdx); // OldVR.subIdx
+ AddedSubIdxs.insert(SubIdx);
+ LanesToExtend.push_back(LanesFromOld);
+ } else {
+ // Non-contiguous case: decompose into multiple subregisters
+ const TargetRegisterClass *OldRC = MRI.getRegClass(OldVR);
+ SmallVector<unsigned, 4> CoveringSubRegs =
+ getCoveringSubRegsForLaneMask(LanesFromOld, &TRI, OldRC);
+
+ assert(!CoveringSubRegs.empty() &&
+ "Failed to decompose non-contiguous lane mask into covering subregs");
+
+ LLVM_DEBUG(dbgs() << " Non-contiguous LanesFromOld=" << PrintLaneMask(LanesFromOld)
+ << " decomposed into " << CoveringSubRegs.size() << " subregs\n");
+
+ // Add each covering subregister as a source to the REG_SEQUENCE
+ for (unsigned CoverSubIdx : CoveringSubRegs) {
+ LaneBitmask CoverMask = TRI.getSubRegIndexLaneMask(CoverSubIdx);
+ RS.addReg(OldVR, 0, CoverSubIdx).addImm(CoverSubIdx); // OldVR.CoverSubIdx
+ AddedSubIdxs.insert(CoverSubIdx);
+ LanesToExtend.push_back(CoverMask);
+
+ LLVM_DEBUG(dbgs() << " Added source: OldVR."
+ << TRI.getSubRegIndexName(CoverSubIdx)
+ << " covering " << PrintLaneMask(CoverMask) << "\n");
+ }
+ }
}
assert(!AddedSubIdxs.empty() && "REG_SEQUENCE must have at least one source");
diff --git a/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
index 61cbd8cc60c11..ffb49b0b6bb9e 100644
--- a/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
+++ b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
@@ -181,6 +181,17 @@ body: |
// Test 1: Insert new definition and verify SSA repair with PHI insertion
//===----------------------------------------------------------------------===//
+// Test basic PHI insertion and use rewriting in a diamond CFG
+//
+// CFG Structure:
+// BB0 (entry)
+// |
+// BB1 (%1 = orig def)
+// / \
+// BB2 BB3 (INSERT: %1 = new_def)
+// \ /
+// BB4 (use %1) → PHI expected
+//
TEST(MachineLaneSSAUpdaterTest, NewDefInsertsPhiAndRewritesUses) {
liveIntervalsTest(R"MIR(
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -299,7 +310,7 @@ TEST(MachineLaneSSAUpdaterTest, NewDefInsertsPhiAndRewritesUses) {
//===----------------------------------------------------------------------===//
// Test 2: Multiple PHI insertions in nested control flow
//
-// CFG structure (from user's diagram):
+// CFG structure:
// bb.0
// |
// bb.1 (%1 = original def)
@@ -494,23 +505,32 @@ TEST(MachineLaneSSAUpdaterTest, MultiplePhiInsertion) {
// This tests the "LaneAware" part of MachineLaneSSAUpdater.
//
// Scenario:
-// - Start with a 64-bit register %1 (has sub0 and sub1 lanes)
+// - Start with a 64-bit register %3 (has sub0 and sub1 lanes)
// - Insert a new definition that only updates sub0 (lower 32 bits)
// - The SSA updater should:
// 1. Track that only sub0 lane is modified (not sub1)
// 2. Create PHI that merges only the sub0 lane
// 3. Preserve the original sub1 lane
+// 4. Generate REG_SEQUENCE to compose full register from PHI+unchanged lanes
//
-// CFG:
-// bb.0
-// |
-// bb.1 (%1 = 64-bit def, both lanes)
-// / \
-// bb.2 bb.3 (new def updates only %X.sub0)
-// \ /
-// bb.4 (needs PHI for sub0 lane only)
-// |
-// bb.5 (use both lanes)
+// CFG Structure:
+// BB0 (entry)
+// |
+// BB1 (%3:vreg_64 = REG_SEQUENCE of %1:sub0, %2:sub1)
+// / \
+// BB2 BB3 (INSERT: %3.sub0 = new_def)
+// | |
+// use (no use)
+// sub0
+// \ /
+// BB4 (use sub0 + sub1) → PHI for sub0 lane only
+// |
+// BB5 (use full %3) → REG_SEQUENCE to compose full reg from PHI result + unchanged sub1
+//
+// Expected behavior:
+// - PHI in BB4 merges only sub0 lane (changed)
+// - sub1 lane flows unchanged through the diamond
+// - REG_SEQUENCE in BB5 composes full 64-bit from (PHI_sub0, original_sub1)
//===----------------------------------------------------------------------===//
TEST(MachineLaneSSAUpdaterTest, SubregisterLaneTracking) {
@@ -840,11 +860,9 @@ TEST(MachineLaneSSAUpdaterTest, SubregDefToFullRegPHI) {
// 3. Check the existing PHI in bb.7
bool FoundPHI = false;
Register PHIReg;
- MachineInstr *PHI = nullptr;
for (MachineInstr &MI : *BB7) {
if (MI.isPHI()) {
FoundPHI = true;
- PHI = &MI;
PHIReg = MI.getOperand(0).getReg();
llvm::errs() << "PHI in bb.7 after SSA repair: ";
MI.print(llvm::errs());
@@ -908,6 +926,20 @@ TEST(MachineLaneSSAUpdaterTest, SubregDefToFullRegPHI) {
// for the back edge operand since NewDefBB (bb.2) dominates the loop latch (bb.2).
//===----------------------------------------------------------------------===//
+// Test loop with new definition in loop body requiring PHI in loop header
+//
+// CFG Structure:
+// BB0 (entry, %1 = orig def)
+// |
+// +-> BB1 (loop header)
+// | / \
+// | / \
+// BB2 BB3 (exit, use %1)
+// |
+// (INSERT: %1 = new_def)
+// |
+// +-(backedge) -> PHI needed in BB1 to merge initial value and loop value
+//
TEST(MachineLaneSSAUpdaterTest, LoopWithDefInBody) {
liveIntervalsTest(R"MIR(
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -1052,34 +1084,39 @@ TEST(MachineLaneSSAUpdaterTest, LoopWithDefInBody) {
// 5. PHI2 at loop header (merges entry value and PHI1 result from back edge)
// 6. Use after diamond (in latch) should use PHI1 result
//
-// CFG:
-// bb.0 (entry: X=1, i=0)
-// |
-// v
-// bb.1 (loop header)
-// PHI_i = PHI(0, bb.0, i+1, bb.5) [already in input MIR]
-// PHI2 = PHI(X, bb.0, PHI1, bb.5) [created by SSA updater]
-// USE X (before redef!) [rewritten to PHI2]
-// if (i < 10)
-// / \
-// bb.2 bb.3 (NEW DEF: X=99)
-// (then) (else)
-// | |
-// \ /
-// \/
-// bb.4 (diamond join)
-// PHI1 = PHI(X, bb.2, NewReg, bb.3) [created by SSA updater]
-// |
-// v
-// bb.5 (latch)
-// USE X [rewritten to PHI1]
-// i = i + 1
-// branch to bb.1
-// |
-// bb.6 (exit)
-// USE X
+// CFG Structure:
+// BB0 (entry: X=%1, i=0)
+// |
+// +-> BB1 (loop header)
+// | | PHI_i = PHI(0, BB0; i+1, BB5) [already in input MIR]
+// | | PHI2 = PHI(X, BB0; PHI1, BB5) [created by SSA updater]
+// | | USE X (before redef!) [rewritten to use PHI2]
+// | | if (i < 10)
+// | / \
+// | BB2 BB3 (INSERT: X = 99)
+// | | |
+// | | (then: X unchanged)
+// | | (else: NEW DEF)
+// | \ /
+// | BB4 (diamond join)
+// | | PHI1 = PHI(X, BB2; NewReg, BB3) [created by SSA updater]
+// | |
+// | BB5 (loop latch)
+// | | USE X [rewritten to use PHI1]
+// | | i = i + 1
+// | | \
+// | | \
+// +---+ BB6 (exit, USE X)
+//
+// Key challenge: Use in BB1 occurs BEFORE the def in BB3 (in program order),
+// requiring PHI2 in the loop header for proper SSA form.
+//
+// Expected SSA repair:
+// - PHI1 created in BB4 (diamond join): merges unchanged X from BB2, new def from BB3
+// - PHI2 created in BB1 (loop header): merges entry X from BB0, PHI1 result from BB5
+// - Use in BB1 rewritten to PHI2
+// - Use in BB5 rewritten to PHI1
//===----------------------------------------------------------------------===//
-
TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
liveIntervalsTest(R"MIR(
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -1146,7 +1183,6 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
MachineBasicBlock *BB0 = MF.getBlockNumbered(0); // Entry
MachineBasicBlock *BB1 = MF.getBlockNumbered(1); // Loop header
- MachineBasicBlock *BB2 = MF.getBlockNumbered(2); // Then
MachineBasicBlock *BB3 = MF.getBlockNumbered(3); // Else (new def here)
MachineBasicBlock *BB4 = MF.getBlockNumbered(4); // Diamond join
MachineBasicBlock *BB5 = MF.getBlockNumbered(5); // Latch
@@ -1321,9 +1357,33 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
});
}
-// Test: Multiple subreg redefinitions in loop (X.sub0 in one branch, X.sub1 in latch)
+// Test 7: Multiple subreg redefinitions in loop (X.sub0 in one branch, X.sub1 in latch)
// This tests the most complex scenario: two separate lane redefinitions with REG_SEQUENCE
// composition at the backedge.
+// Test multiple subregister redefinitions in different paths within a loop
+//
+// CFG Structure:
+// BB0 (entry, %1:vreg_64 = IMPLICIT_DEF)
+// |
+// +-> BB1 (loop header, PHI for %0)
+// | | (use %0.sub0)
+// | / \
+// | BB2 BB5
+// | | |
+// | use INSERT: %0.sub0 = new_def1
+// |sub1 use %0.sub0
+// | \ /
+// | BB3 (latch)
+// | | (INSERT: %3.sub1 = new_def2, where %3 is increment result)
+// | | (%3 = %0 << 1)
+// +---+
+// |
+// BB4 (exit)
+//
+// Key: Two separate lane redefinitions requiring separate SSA repairs:
+// 1. %0.sub0 in BB5 → PHI for sub0 in BB3
+// 2. %3.sub1 in BB3 (after increment) → PHI for sub1 in BB1
+//
TEST(MachineLaneSSAUpdaterTest, MultipleSubregRedefsInLoop) {
SmallString<2048> S;
StringRef MIRString = (Twine(R"MIR(
@@ -1386,22 +1446,21 @@ body: |
// Get basic blocks
auto BBI = MF.begin();
- MachineBasicBlock *BB0 = &*BBI++; // Entry
+ ++BBI; // Skip BB0 (Entry)
MachineBasicBlock *BB1 = &*BBI++; // Loop header
- MachineBasicBlock *BB2 = &*BBI++; // True branch (uses X.HI)
+ ++BBI; // Skip BB2 (True branch)
MachineBasicBlock *BB5 = &*BBI++; // False branch (uses X.LO, INSERT def X.LO)
MachineBasicBlock *BB3 = &*BBI++; // Latch (increment, INSERT def X.HI)
- MachineBasicBlock *BB4 = &*BBI++; // Exit
+ // Skip BB4 (Exit)
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ (void)MRI; // May be unused, suppress warning
// Find the 64-bit register and its subregister indices
Register OrigReg = Register::index2VirtReg(0); // %0 from MIR
ASSERT_TRUE(OrigReg.isValid()) << "Register %0 should be valid";
-
- const TargetRegisterClass *RC64 = MRI.getRegClass(OrigReg);
unsigned Sub0Idx = 0, Sub1Idx = 0;
// Find sub0 (low 32 bits) and sub1 (high 32 bits)
@@ -1527,6 +1586,7 @@ body: |
}
}
}
+ EXPECT_TRUE(FoundSub0PHI) << "Should have PHI for sub0 lane in BB3";
// 2. Should have REG_SEQUENCE in BB3 before backedge to compose full 64-bit
bool FoundREGSEQ = false;
@@ -1557,4 +1617,777 @@ body: |
});
}
+// Test 8: Nested loops with SSA repair across multiple loop levels
+// This tests SSA repair with a new definition in an inner loop body that propagates
+// to both the inner loop header and outer loop header PHIs.
+// Test nested loops with SSA repair across multiple loop levels
+//
+// CFG Structure:
+// BB0 (entry, %0 = 100)
+// |
+// +-> BB1 (outer loop header)
+// | | PHI for %1 (outer induction var)
+// | |
+// | +->BB2 (inner loop header)
+// | | | PHI for %2 (inner induction var)
+// | | |\
+// | | | \
+// | | BB3 BB4 (outer loop body)
+// | | |
+// | | INSERT: %0 = new_def
+// | | (%3 = %2 + %0)
+// | | |
+// | +--+ (inner backedge) -> PHI in BB2 for %0 expected
+// | |
+// | (%4 = %1 + %0, use %0)
+// +----+ (outer backedge)
+// |
+// BB5 (exit)
+//
+// Key: New def in inner loop body propagates to:
+// 1. Inner loop header PHI (BB2)
+// 2. Outer loop body uses (BB4)
+// 3. Outer loop header PHI (BB1)
+//
+TEST(MachineLaneSSAUpdaterTest, NestedLoopsWithSSARepair) {
+ SmallString<2048> S;
+ StringRef MIRString = (Twine(R"MIR(
+--- |
+ define amdgpu_kernel void @func() { ret void }
+...
+---
+name: func
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+ successors: %bb.1
+ %0:vgpr_32 = V_MOV_B32_e32 100, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2
+ ; Outer loop header: %1 = PHI(initial, result_from_outer_body)
+ %1:vgpr_32 = PHI %0:vgpr_32, %bb.0, %4:vgpr_32, %bb.4
+ dead %5:vgpr_32 = V_ADD_U32_e32 %1:vgpr_32, %1:vgpr_32, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3, %bb.4
+ ; Inner loop header: %2 = PHI(from_outer, from_inner_body)
+ %2:vgpr_32 = PHI %1:vgpr_32, %bb.1, %3:vgpr_32, %bb.3
+ dead %6:vgpr_32 = V_MOV_B32_e32 %2:vgpr_32, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 5
+ S_CMP_LT_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+ S_BRANCH %bb.4
+
+ bb.3:
+ successors: %bb.2
+ ; Inner loop body - accumulate value, then we'll insert new def for %0
+ %3:vgpr_32 = V_ADD_U32_e32 %2:vgpr_32, %0:vgpr_32, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.4:
+ successors: %bb.1, %bb.5
+ ; Outer loop body after inner loop exit
+ ; Increment outer induction variable %1 and use %0 (which we'll redefine)
+ %4:vgpr_32 = V_ADD_U32_e32 %1:vgpr_32, %0:vgpr_32, implicit $exec
+ dead %7:vgpr_32 = V_MOV_B32_e32 %0:vgpr_32, implicit $exec
+ $sgpr2 = S_MOV_B32 0
+ $sgpr3 = S_MOV_B32 10
+ S_CMP_LT_U32 $sgpr2, $sgpr3, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.5
+
+ bb.5:
+ ; Exit
+ S_ENDPGM 0
+...
+)MIR")).toNullTerminatedStringRef(S);
+
+ doTest<LiveIntervalsWrapperPass>(MIRString,
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ llvm::errs() << "\n=== NestedLoopsWithSSARepair Test ===\n";
+
+ // Get basic blocks
+ auto BBI = MF.begin();
+ MachineBasicBlock *BB0 = &*BBI++; // Entry
+ MachineBasicBlock *BB1 = &*BBI++; // Outer loop header
+ MachineBasicBlock *BB2 = &*BBI++; // Inner loop header
+ MachineBasicBlock *BB3 = &*BBI++; // Inner loop body (INSERT HERE)
+ MachineBasicBlock *BB4 = &*BBI++; // Outer loop body (after inner)
+ // BB5 = Exit (not needed)
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Get the register that will be redefined (%0 is the initial value)
+ Register OrigReg = Register::index2VirtReg(0);
+ ASSERT_TRUE(OrigReg.isValid()) << "Register %0 should be valid";
+
+ llvm::errs() << "Original register: %" << OrigReg.virtRegIndex() << "\n";
+
+ // Get V_MOV opcode and EXEC register
+ MachineInstr *MovInst = &*BB0->begin();
+ unsigned MovOpcode = MovInst->getOpcode();
+ Register ExecReg = MovInst->getOperand(2).getReg();
+
+ // Print initial state
+ llvm::errs() << "\nInitial BB2 (inner loop header):\n";
+ for (MachineInstr &MI : *BB2) {
+ MI.print(llvm::errs());
+ }
+
+ llvm::errs() << "\nInitial BB1 (outer loop header):\n";
+ for (MachineInstr &MI : *BB1) {
+ MI.print(llvm::errs());
+ }
+
+ // Insert new definition in BB3 (inner loop body)
+ // Find insertion point before the branch
+ MachineInstr *InsertPt = nullptr;
+ for (MachineInstr &MI : *BB3) {
+ if (MI.isBranch()) {
+ InsertPt = &MI;
+ break;
+ }
+ }
+ ASSERT_NE(InsertPt, nullptr) << "Should find branch in BB3";
+
+ // Insert: X = 999 (violates SSA)
+ MachineInstr *NewDefMI = BuildMI(*BB3, InsertPt, DebugLoc(),
+ TII->get(MovOpcode), OrigReg)
+ .addImm(999)
+ .addReg(ExecReg, RegState::Implicit);
+
+ llvm::errs() << "\nInserted new def in BB3 (inner loop body): ";
+ NewDefMI->print(llvm::errs());
+
+ // Create SSA updater and repair
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
+
+ llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+
+ // === Verification ===
+ llvm::errs() << "\n=== Verification ===\n";
+
+ llvm::errs() << "\nFinal BB2 (inner loop header):\n";
+ for (MachineInstr &MI : *BB2) {
+ MI.print(llvm::errs());
+ }
+
+ llvm::errs() << "\nFinal BB1 (outer loop header):\n";
+ for (MachineInstr &MI : *BB1) {
+ MI.print(llvm::errs());
+ }
+
+ llvm::errs() << "\nFinal BB4 (outer loop body after inner):\n";
+ for (MachineInstr &MI : *BB4) {
+ MI.print(llvm::errs());
+ }
+
+ // 1. Inner loop header (BB2) should have NEW PHI created by SSA repair
+ bool FoundSSARepairPHI = false;
+ Register SSARepairPHIReg;
+ for (MachineInstr &MI : *BB2) {
+ if (MI.isPHI()) {
+ // Look for a PHI that has NewReg as one of its incoming values
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ Register IncomingReg = MI.getOperand(i).getReg();
+ MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB();
+
+ if (IncomingMBB == BB3 && IncomingReg == NewReg) {
+ FoundSSARepairPHI = true;
+ SSARepairPHIReg = MI.getOperand(0).getReg();
+ llvm::errs() << "Found SSA repair PHI in inner loop header: ";
+ MI.print(llvm::errs());
+
+ // Should have incoming from BB1 and BB3
+ unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
+ EXPECT_EQ(NumIncoming, 2u) << "SSA repair PHI should have 2 incoming";
+ break;
+ }
+ }
+ if (FoundSSARepairPHI)
+ break;
+ }
+ }
+ EXPECT_TRUE(FoundSSARepairPHI) << "Should find SSA repair PHI in BB2 (inner loop header)";
+
+ // 2. Outer loop header (BB1) may have PHI updated if needed
+ bool FoundOuterPHI = false;
+ for (MachineInstr &MI : *BB1) {
+ if (MI.isPHI() && MI.getOperand(0).getReg() == Register::index2VirtReg(1)) {
+ FoundOuterPHI = true;
+ llvm::errs() << "Found outer loop PHI: ";
+ MI.print(llvm::errs());
+ }
+ }
+ EXPECT_TRUE(FoundOuterPHI) << "Should find outer loop PHI in BB1";
+
+ // 3. Use in BB4 should be updated
+ bool FoundUseInBB4 = false;
+ for (MachineInstr &MI : *BB4) {
+ if (!MI.isPHI() && MI.getNumOperands() > 1) {
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+ if (MI.getOperand(i).isReg() && MI.getOperand(i).isUse()) {
+ Register UseReg = MI.getOperand(i).getReg();
+ if (UseReg.isVirtual()) {
+ FoundUseInBB4 = true;
+ llvm::errs() << "Found use in BB4: %" << UseReg.virtRegIndex() << " in ";
+ MI.print(llvm::errs());
+ }
+ }
+ }
+ }
+ }
+ EXPECT_TRUE(FoundUseInBB4) << "Should find uses in outer loop body (BB4)";
+
+ // 4. Verify LiveIntervals
+ EXPECT_TRUE(LIS.hasInterval(NewReg));
+
+ // Debug output if verification fails
+ if (!MF.verify(nullptr, nullptr, nullptr, false)) {
+ llvm::errs() << "MachineFunction verification failed:\n";
+ MF.print(llvm::errs());
+ LIS.print(llvm::errs());
+ }
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Test 9: 128-bit register with 64-bit subreg redef and multiple lane uses
+//
+// This comprehensive test covers:
+// 1. Large register (128-bit) with multiple subregisters (sub0, sub1, sub2, sub3)
+// 2. Partial redefinition (64-bit sub2_3 covering two lanes: sub2+sub3)
+// 3. Uses of changed lanes (sub2, sub3) in different paths
+// 4. Uses of unchanged lanes (sub0, sub1) in different paths
+// 5. Diamond CFG with redef in one branch
+// 6. Second diamond to test propagation of PHI result
+//
+// CFG Structure:
+// BB0 (entry)
+// |
+// BB1 (%0:vreg_128 = initial 128-bit value)
+// |
+// BB2 (diamond1 split)
+// / \
+// BB3 BB4 (INSERT: %0.sub2_3 = new_def)
+// | |
+// use use
+// sub0 sub3 (changed)
+// \ /
+// BB5 (join) -> PHI for sub2_3 lanes (sub2+sub3 changed, sub0+sub1 unchanged)
+// |
+// use sub1 (unchanged, flows from BB1)
+// |
+// BB6 (diamond2 split)
+// / \
+// BB7 BB8
+// | |
+// use (no use)
+// sub2
+// \ /
+// BB9 (join, no PHI - BB5's PHI dominates)
+// |
+// BB10 (use sub0, exit)
+//
+// Expected behavior:
+// - PHI in BB5 merges sub2_3 lanes ONLY (sub2+sub3 changed)
+// - sub0+sub1 lanes flow unchanged from BB1 through entire CFG
+// - Uses in BB5, BB7, BB10 use PHI result or unchanged lanes
+// - No PHI in BB9 (BB5 dominates, PHI result flows through)
+//
+// This test validates:
+// ✓ Partial redefinition (64-bit of 128-bit)
+// ✓ Multiple different subreg uses (sub0, sub1, sub2, sub3)
+// ✓ Changed vs unchanged lane tracking
+// ✓ PHI result propagation to dominated blocks
+//===----------------------------------------------------------------------===//
+TEST(MachineLaneSSAUpdaterTest, MultipleSubregUsesAcrossDiamonds) {
+ SmallString<4096> S;
+ StringRef MIRString = (Twine(R"MIR(
+--- |
+ define amdgpu_kernel void @func() { ret void }
+...
+---
+name: func
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_128 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+body: |
+ bb.0:
+ successors: %bb.1
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2
+ ; Initialize 128-bit register %0 with IMPLICIT_DEF
+ %0:vreg_128 = IMPLICIT_DEF
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3, %bb.4
+ ; Diamond 1 split
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.4, implicit $scc
+
+ bb.3:
+ successors: %bb.5
+ ; Use sub0 (unchanged lane, low 32 bits)
+ %1:vgpr_32 = V_MOV_B32_e32 %0.sub0:vreg_128, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.4:
+ successors: %bb.5
+ ; This is where we'll INSERT: %0.sub2_3 = new_def (64-bit, covers sub2+sub3)
+ ; After insertion, use sub3 (high 32 bits of sub2_3)
+ %2:vgpr_32 = V_MOV_B32_e32 %0.sub3:vreg_128, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.5:
+ successors: %bb.6
+ ; Diamond 1 join - PHI expected for sub2_3 lanes
+ ; Use sub1 (unchanged lane, bits 32-63)
+ %3:vgpr_32 = V_MOV_B32_e32 %0.sub1:vreg_128, implicit $exec
+ S_BRANCH %bb.6
+
+ bb.6:
+ successors: %bb.7, %bb.8
+ ; Diamond 2 split
+ $sgpr2 = S_MOV_B32 0
+ $sgpr3 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr2, $sgpr3, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.8, implicit $scc
+
+ bb.7:
+ successors: %bb.9
+ ; Use sub2 (changed lane, bits 64-95)
+ dead %4:vgpr_32 = V_MOV_B32_e32 %0.sub2:vreg_128, implicit $exec
+ S_BRANCH %bb.9
+
+ bb.8:
+ successors: %bb.9
+ ; No use - sparse use pattern
+ S_NOP 0
+
+ bb.9:
+ successors: %bb.10
+ ; Diamond 2 join - no PHI needed (BB5 dominates)
+ S_NOP 0
+
+ bb.10:
+ ; Exit - use sub0 again (unchanged lane)
+ dead %5:vgpr_32 = V_MOV_B32_e32 %0.sub0:vreg_128, implicit $exec
+ S_ENDPGM 0
+...
+)MIR")).toNullTerminatedStringRef(S);
+
+ doTest<LiveIntervalsWrapperPass>(MIRString,
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ llvm::errs() << "\n=== MultipleSubregUsesAcrossDiamonds Test ===\n";
+
+ // Get basic blocks
+ auto BBI = MF.begin();
+ ++BBI; // Skip BB0 (entry)
+ ++BBI; // Skip BB1 (Initial def)
+ ++BBI; // Skip BB2 (Diamond1 split)
+ MachineBasicBlock *BB3 = &*BBI++; // Diamond1 true (no redef)
+ MachineBasicBlock *BB4 = &*BBI++; // Diamond1 false (INSERT HERE)
+ MachineBasicBlock *BB5 = &*BBI++; // Diamond1 join
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ (void)MRI; // May be unused, suppress warning
+
+ // Find the 128-bit register %0
+ Register OrigReg = Register::index2VirtReg(0);
+ ASSERT_TRUE(OrigReg.isValid()) << "Register %0 should be valid";
+
+ llvm::errs() << "Using 128-bit register: %" << OrigReg.virtRegIndex() << "\n";
+
+ // Find sub2_3 subregister index (64-bit covering bits 64-127)
+ unsigned Sub2_3Idx = 0;
+ for (unsigned Idx = 1; Idx < TRI->getNumSubRegIndices(); ++Idx) {
+ unsigned SubRegSize = TRI->getSubRegIdxSize(Idx);
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(Idx);
+
+ // Looking for 64-bit subreg covering upper half (lanes for sub2+sub3)
+ // sub2_3 should have mask 0xF0 (lanes for bits 64-127)
+ if (SubRegSize == 64 && (Mask.getAsInteger() & 0xF0) == 0xF0) {
+ Sub2_3Idx = Idx;
+ llvm::errs() << "Found sub2_3 index: " << Idx
+ << " (size=" << SubRegSize
+ << ", mask=0x" << llvm::format("%X", Mask.getAsInteger()) << ")\n";
+ break;
+ }
+ }
+
+ ASSERT_NE(Sub2_3Idx, 0u) << "Should find sub2_3 subregister index";
+
+ // Insert new definition in BB4: %0.sub2_3 = IMPLICIT_DEF
+ // Find insertion point (before the use of sub3)
+ MachineInstr *UseOfSub3 = nullptr;
+
+ for (MachineInstr &MI : *BB4) {
+ if (MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == OrigReg) {
+ UseOfSub3 = &MI;
+ break;
+ }
+ }
+ ASSERT_NE(UseOfSub3, nullptr) << "Should find use of sub3 in BB4";
+
+ // Create new def: %0.sub2_3 = IMPLICIT_DEF
+ // We use IMPLICIT_DEF because it works for any register size and the SSA updater
+ // doesn't care about the specific instruction semantics - we're just testing SSA repair
+ MachineInstrBuilder MIB = BuildMI(*BB4, UseOfSub3, DebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF))
+ .addDef(OrigReg, RegState::Define, Sub2_3Idx);
+
+ MachineInstr *NewDefMI = MIB.getInstr();
+ llvm::errs() << "Inserted new def in BB4: ";
+ NewDefMI->print(llvm::errs());
+
+ // Index the new instruction
+ LIS.InsertMachineInstrInMaps(*NewDefMI);
+
+ // Set MachineFunction properties to allow PHI insertion
+ MF.getProperties().set(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
+ // Create SSA updater and repair
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
+
+ llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+
+ // Print final state of key blocks
+ llvm::errs() << "\nFinal BB5 (diamond1 join):\n";
+ for (MachineInstr &MI : *BB5) {
+ MI.print(llvm::errs());
+ }
+
+ // Verify SSA repair results
+
+ // 1. Should have PHI in BB5 for sub2+sub3 lanes
+ bool FoundPHI = false;
+ for (MachineInstr &MI : *BB5) {
+ if (MI.isPHI()) {
+ Register PHIResult = MI.getOperand(0).getReg();
+ if (PHIResult.isVirtual()) {
+ llvm::errs() << "Found PHI in BB5: ";
+ MI.print(llvm::errs());
+
+ // Check that it has 2 incoming values
+ unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
+ EXPECT_EQ(NumIncoming, 2u) << "PHI should have 2 incoming values";
+
+ // Check that one incoming is the new register from BB4
+ // and the other incoming from BB3 uses %0.sub2_3
+ bool HasNewRegFromBB4 = false;
+ bool HasCorrectSubregFromBB3 = false;
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ Register IncomingReg = MI.getOperand(i).getReg();
+ unsigned IncomingSubReg = MI.getOperand(i).getSubReg();
+ MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB();
+
+ if (IncomingMBB == BB4) {
+ HasNewRegFromBB4 = (IncomingReg == NewReg);
+ llvm::errs() << " Incoming from BB4: %" << IncomingReg.virtRegIndex() << "\n";
+ } else if (IncomingMBB == BB3) {
+ // Should be %0.sub2_3 (the lanes we redefined)
+ llvm::errs() << " Incoming from BB3: %" << IncomingReg.virtRegIndex();
+ if (IncomingSubReg) {
+ llvm::errs() << "." << TRI->getSubRegIndexName(IncomingSubReg);
+ }
+ llvm::errs() << "\n";
+
+ // Verify it's using sub2_3
+ if (IncomingReg == OrigReg && IncomingSubReg == Sub2_3Idx) {
+ HasCorrectSubregFromBB3 = true;
+ }
+ }
+ }
+ EXPECT_TRUE(HasNewRegFromBB4) << "PHI should use NewReg from BB4";
+ EXPECT_TRUE(HasCorrectSubregFromBB3) << "PHI should use %0.sub2_3 from BB3";
+ FoundPHI = true;
+ }
+ }
+ }
+ EXPECT_TRUE(FoundPHI) << "Should find PHI in BB5 for sub2_3 lanes";
+
+ // 2. Verify LiveIntervals
+ EXPECT_TRUE(LIS.hasInterval(NewReg));
+ EXPECT_TRUE(LIS.hasInterval(OrigReg));
+
+ // 3. Verify LiveInterval for OrigReg has subranges for changed lanes
+ LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+ EXPECT_TRUE(OrigLI.hasSubRanges()) << "OrigReg should have subranges after partial redef";
+
+ // Debug output if verification fails
+ if (!MF.verify(nullptr, nullptr, nullptr, false)) {
+ llvm::errs() << "MachineFunction verification failed:\n";
+ MF.print(llvm::errs());
+ LIS.print(llvm::errs());
+ }
+ });
+}
+
+// Test 10: Non-contiguous lane mask - redefine sub1 of 128-bit, use full register
+// This specifically tests the multi-source REG_SEQUENCE code path for non-contiguous lanes
+//
+// CFG Structure:
+// BB0 (entry)
+// |
+// v
+// BB1 (%0:vreg_128 = IMPLICIT_DEF)
+// |
+// v
+// BB2 (diamond split)
+// / \
+// / \
+// v v
+// BB3 BB4 (%0.sub1 = IMPLICIT_DEF - redefine middle lane!)
+// \ /
+// \ /
+// v
+// BB5 (diamond join - USE %0 as full register)
+// |
+// v
+// BB6 (exit)
+//
+// Key Property: Redefining sub1 leaves LanesFromOld = sub0 + sub2 + sub3 (non-contiguous!)
+// This requires getCoveringSubRegsForLaneMask to decompose into multiple subregs
+// Expected REG_SEQUENCE: %RS = REG_SEQUENCE %6, sub1, %0.sub0, sub0, %0.sub2_3, sub2_3
+//
+TEST(MachineLaneSSAUpdaterTest, NonContiguousLaneMaskREGSEQUENCE) {
+ SmallString<4096> S;
+ StringRef MIRString = (Twine(R"MIR(
+--- |
+ define amdgpu_kernel void @func() { ret void }
+...
+---
+name: func
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_128 }
+ - { id: 1, class: vreg_128 }
+body: |
+ bb.0:
+ successors: %bb.1
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2, %bb.3
+ %0:vreg_128 = IMPLICIT_DEF
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 1
+ S_CMP_LG_U32 $sgpr0, $sgpr1, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.3, implicit $scc
+
+ bb.2:
+ successors: %bb.4
+ ; Left path - no redefinition
+ S_NOP 0
+ S_BRANCH %bb.4
+
+ bb.3:
+ successors: %bb.4
+ ; Right path - THIS IS WHERE WE'LL INSERT: %0.sub1 = IMPLICIT_DEF
+ S_NOP 0
+ S_BRANCH %bb.4
+
+ bb.4:
+ ; Diamond join - use FULL register (this will need REG_SEQUENCE!)
+ ; Using full %0 (not a subreg) forces composition of non-contiguous lanes
+ dead %1:vreg_128 = COPY %0:vreg_128
+ S_ENDPGM 0
+...
+)MIR")).toNullTerminatedStringRef(S);
+
+ doTest<LiveIntervalsWrapperPass>(MIRString,
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ llvm::errs() << "\n=== NonContiguousLaneMaskREGSEQUENCE Test ===\n";
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ (void)MRI; // May be unused, suppress warning
+
+ // Find blocks
+ // bb.0 = entry
+ // bb.1 = IMPLICIT_DEF + diamond split
+ // bb.2 = left path (no redef)
+ // bb.3 = right path (INSERT sub1 def here)
+ // bb.4 = diamond join (use full register)
+ MachineBasicBlock *BB3 = MF.getBlockNumbered(3); // Right path - where we insert
+ MachineBasicBlock *BB4 = MF.getBlockNumbered(4); // Join - where we need REG_SEQUENCE
+
+ // Find %0 (the vreg_128)
+ Register OrigReg = Register::index2VirtReg(0);
+ ASSERT_TRUE(OrigReg.isValid()) << "Register %0 should be valid";
+ llvm::errs() << "Using 128-bit register: %" << OrigReg.virtRegIndex() << "\n";
+
+ // Find sub1 subregister index
+ unsigned Sub1Idx = 0;
+ for (unsigned Idx = 1; Idx < TRI->getNumSubRegIndices(); ++Idx) {
+ StringRef Name = TRI->getSubRegIndexName(Idx);
+ if (Name == "sub1") {
+ Sub1Idx = Idx;
+ break;
+ }
+ }
+
+ ASSERT_NE(Sub1Idx, 0u) << "Should find sub1 subregister index";
+
+ // Insert new definition in BB3 (right path): %0.sub1 = IMPLICIT_DEF
+ MachineInstrBuilder MIB = BuildMI(*BB3, BB3->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF))
+ .addDef(OrigReg, RegState::Define, Sub1Idx);
+
+ MachineInstr *NewDefMI = MIB.getInstr();
+ llvm::errs() << "Inserted new def in BB3: ";
+ NewDefMI->print(llvm::errs());
+
+ // Index the new instruction
+ LIS.InsertMachineInstrInMaps(*NewDefMI);
+
+ // Set MachineFunction properties to allow PHI insertion
+ MF.getProperties().set(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
+ // Create SSA updater and repair
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
+
+ llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+
+ // Print final state
+ llvm::errs() << "\nFinal BB4 (diamond join):\n";
+ for (MachineInstr &MI : *BB4) {
+ MI.print(llvm::errs());
+ }
+
+ // Verify SSA repair results
+
+ // 1. Should have PHI in BB4 for sub1 lane
+ bool FoundPHI = false;
+ Register PHIReg;
+ for (MachineInstr &MI : *BB4) {
+ if (MI.isPHI()) {
+ PHIReg = MI.getOperand(0).getReg();
+ if (PHIReg.isVirtual()) {
+ llvm::errs() << "Found PHI in BB4: ";
+ MI.print(llvm::errs());
+ FoundPHI = true;
+
+ // Check that it has 2 incoming values
+ unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
+ EXPECT_EQ(NumIncoming, 2u) << "PHI should have 2 incoming values";
+
+ // One incoming should be the new register (vgpr_32 from BB3)
+ bool HasNewRegFromBB3 = false;
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ if (MI.getOperand(i).isReg() && MI.getOperand(i).getReg() == NewReg) {
+ EXPECT_EQ(MI.getOperand(i + 1).getMBB(), BB3) << "NewReg should come from BB3";
+ HasNewRegFromBB3 = true;
+ }
+ }
+ EXPECT_TRUE(HasNewRegFromBB3) << "PHI should have NewReg from BB3";
+
+ break;
+ }
+ }
+ }
+
+ EXPECT_TRUE(FoundPHI) << "Should create PHI in BB4 for sub1 lane";
+
+ // 2. Most importantly: Should have REG_SEQUENCE with MULTIPLE sources for non-contiguous lanes
+ // After PHI for sub1, we need to compose full register:
+ // LanesFromOld = sub0 + sub2 + sub3 (non-contiguous!)
+ // This requires multiple REG_SEQUENCE operands
+ bool FoundREGSEQUENCE = false;
+ unsigned NumREGSEQSources = 0;
+
+ for (MachineInstr &MI : *BB4) {
+ if (MI.getOpcode() == TargetOpcode::REG_SEQUENCE) {
+ llvm::errs() << "Found REG_SEQUENCE: ";
+ MI.print(llvm::errs());
+ FoundREGSEQUENCE = true;
+
+ // Count sources (each source is: register + subregidx, so pairs)
+ NumREGSEQSources = (MI.getNumOperands() - 1) / 2;
+ llvm::errs() << " REG_SEQUENCE has " << NumREGSEQSources << " sources\n";
+
+ // We expect at least 2 sources for non-contiguous case:
+ // 1. PHI result covering sub1
+ // 2. One or more sources from OrigReg covering sub0, sub2, sub3
+ EXPECT_GE(NumREGSEQSources, 2u)
+ << "REG_SEQUENCE should have multiple sources for non-contiguous lanes";
+
+ // Verify at least one source is the PHI result
+ bool HasPHISource = false;
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ if (MI.getOperand(i).isReg() && MI.getOperand(i).getReg() == PHIReg) {
+ HasPHISource = true;
+ break;
+ }
+ }
+ EXPECT_TRUE(HasPHISource) << "REG_SEQUENCE should use PHI result";
+
+ break;
+ }
+ }
+
+ EXPECT_TRUE(FoundREGSEQUENCE)
+ << "Should create REG_SEQUENCE to compose full register from non-contiguous lanes";
+
+ // 3. The COPY use should now reference the REG_SEQUENCE result (not %0)
+ bool FoundRewrittenUse = false;
+ for (MachineInstr &MI : *BB4) {
+ if (MI.getOpcode() == TargetOpcode::COPY) {
+ MachineOperand &SrcOp = MI.getOperand(1);
+ if (SrcOp.isReg() && SrcOp.getReg().isVirtual() && SrcOp.getReg() != OrigReg) {
+ llvm::errs() << "Found rewritten COPY: ";
+ MI.print(llvm::errs());
+ FoundRewrittenUse = true;
+ break;
+ }
+ }
+ }
+
+ EXPECT_TRUE(FoundRewrittenUse) << "COPY should be rewritten to use REG_SEQUENCE result";
+
+ // Print summary
+ llvm::errs() << "\n=== Test Summary ===\n";
+ llvm::errs() << "✓ Redefined sub1 (middle lane) of vreg_128\n";
+ llvm::errs() << "✓ Created PHI for sub1 lane\n";
+ llvm::errs() << "✓ Created REG_SEQUENCE with " << NumREGSEQSources
+ << " sources to handle non-contiguous lanes (sub0 + sub2 + sub3)\n";
+ llvm::errs() << "✓ This test exercises getCoveringSubRegsForLaneMask!\n";
+ });
+}
+
} // anonymous namespace
>From ebcbb8c659ae7ca2746228a3ca7b91281da08eea Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Mon, 13 Oct 2025 19:31:37 +0000
Subject: [PATCH 6/7] [CodeGen] Add MachineLaneSSAUpdater tests and simplify
API
Add 11 unit tests covering SSA repair for diamonds, loops, subregisters,
and spill/reload scenarios. Fix critical bugs in subregister remapping
and LiveInterval handling.
Simplify design by removing ~250 lines of unnecessary spill-specific code
(SpillCutCollector, CutEndPoints, addDefAndRepairAfterSpill). The unified
repairSSAForNewDef() method handles all scenarios, naturally pruning
LiveIntervals through use rewriting and recomputation.
---
.../llvm/CodeGen/MachineLaneSSAUpdater.h | 89 +----
llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp | 187 ----------
llvm/unittests/CodeGen/CMakeLists.txt | 1 +
.../MachineLaneSSAUpdaterSpillReloadTest.cpp | 332 ++++++++++++++++++
4 files changed, 337 insertions(+), 272 deletions(-)
create mode 100644 llvm/unittests/CodeGen/MachineLaneSSAUpdaterSpillReloadTest.cpp
diff --git a/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
index 4f5b7340b179d..764bdec65d580 100644
--- a/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
+++ b/llvm/include/llvm/CodeGen/MachineLaneSSAUpdater.h
@@ -32,88 +32,14 @@ class LiveRange;
class MachineDominatorTree;
class MachinePostDominatorTree; // optional if you choose to use it
-//===----------------------------------------------------------------------===//
-// CutEndPoints: Opaque token representing a spill-time cut of a value.
-// Constructed only by SpillCutCollector and consumed by the updater in
-// addDefAndRepairAfterSpill().
-//===----------------------------------------------------------------------===//
-class CutEndPoints {
-public:
- CutEndPoints() = delete;
-
- Register getOrigVReg() const { return OrigVReg; }
- SlotIndex getCutIdx() const { return CutIdx; }
- const SmallVector<LaneBitmask, 4> &getTouchedLaneMasks() const { return TouchedLaneMasks; }
-
- // Access to captured endpoint data for extendToIndices()
- const SmallVector<SlotIndex, 8> &getMainEndPoints() const { return MainEndPoints; }
- const DenseMap<LaneBitmask, SmallVector<SlotIndex, 8>> &getSubrangeEndPoints() const {
- return SubrangeEndPoints;
- }
-
- // Optional: debugging aids (not required for functionality).
- const SmallVector<LiveRange::Segment, 4> &getDebugSegsBefore() const { return SegsBefore; }
-
-private:
- friend class SpillCutCollector; // only the collector can create valid tokens
-
- // Private constructor used by the collector.
- CutEndPoints(Register VReg,
- SlotIndex Cut,
- SmallVector<LaneBitmask, 4> Lanes,
- SmallVector<SlotIndex, 8> MainEP,
- DenseMap<LaneBitmask, SmallVector<SlotIndex, 8>> SubEP,
- SmallVector<LiveRange::Segment, 4> Before)
- : OrigVReg(VReg), CutIdx(Cut),
- TouchedLaneMasks(std::move(Lanes)),
- MainEndPoints(std::move(MainEP)),
- SubrangeEndPoints(std::move(SubEP)),
- SegsBefore(std::move(Before)) {}
-
- Register OrigVReg;
- SlotIndex CutIdx;
- SmallVector<LaneBitmask, 4> TouchedLaneMasks; // main + touched subranges
-
- // Captured endpoint data for extendToIndices()
- SmallVector<SlotIndex, 8> MainEndPoints;
- DenseMap<LaneBitmask, SmallVector<SlotIndex, 8>> SubrangeEndPoints;
-
- // Optional diagnostics: segments before pruning (for asserts/debug dumps).
- SmallVector<LiveRange::Segment, 4> SegsBefore;
-};
-
-//===----------------------------------------------------------------------===//
-// SpillCutCollector: captures EndPoints at spill-time by calling pruneValue()
-// on the main live range and the touched subranges. The opaque CutEndPoints
-// are later consumed by the updater.
-//===----------------------------------------------------------------------===//
-class SpillCutCollector {
-public:
- explicit SpillCutCollector(LiveIntervals &LIS, MachineRegisterInfo &MRI)
- : LIS(LIS), MRI(MRI) {}
-
- // Decide a cut at CutIdx for OrigVReg (lane-aware). This should:
- // - call pruneValue() on main + subranges as needed,
- // - stash the returned endpoints needed by extendToIndices(),
- // - return an opaque token capturing OrigVReg, CutIdx, and masks.
- CutEndPoints cut(Register OrigVReg, SlotIndex CutIdx, LaneBitmask LanesToCut);
-
-private:
- LiveIntervals &LIS;
- MachineRegisterInfo &MRI;
-};
-
//===----------------------------------------------------------------------===//
// MachineLaneSSAUpdater: universal SSA repair for Machine IR (lane-aware)
//
-// Use Case 1 (Common): repairSSAForNewDef()
+// Use Case: repairSSAForNewDef()
// - Caller creates a new instruction that defines an existing vreg (violating SSA)
// - This function creates a new vreg, replaces the operand, and repairs SSA
-// - Example: User inserts "OrigVReg = ADD ..." and calls repairSSAForNewDef()
-//
-// Use Case 2 (Spill/Reload): addDefAndRepairAfterSpill()
-// - Spiller has already created both instruction and new vreg
-// - Must consume CutEndPoints from spill-time
+// - Example: Insert "OrigVReg = ADD ..." and call repairSSAForNewDef()
+// - This works for all scenarios including spill/reload
//===----------------------------------------------------------------------===//
class MachineLaneSSAUpdater {
public:
@@ -138,15 +64,8 @@ class MachineLaneSSAUpdater {
// Returns: The newly created virtual register
Register repairSSAForNewDef(MachineInstr &NewDefMI, Register OrigVReg);
- // Reload-after-spill path (requires spill-time EndPoints). Will assert
- // if the token does not match the OrigVReg or if indices are inconsistent.
- Register addDefAndRepairAfterSpill(MachineInstr &ReloadMI,
- Register OrigVReg,
- LaneBitmask DefMask,
- const CutEndPoints &EP);
-
private:
- // Common SSA repair logic used by both entry points
+ // Common SSA repair logic
void performSSARepair(Register NewVReg, Register OrigVReg,
LaneBitmask DefMask, MachineBasicBlock *DefBB);
diff --git a/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
index 64c2b5d1706c5..000fbb78f742c 100644
--- a/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineLaneSSAUpdater.cpp
@@ -45,119 +45,6 @@
using namespace llvm;
-//===----------------------------------------------------------------------===//
-// SpillCutCollector Implementation
-//===----------------------------------------------------------------------===//
-
-CutEndPoints SpillCutCollector::cut(Register OrigVReg, SlotIndex CutIdx,
- LaneBitmask LanesToCut) {
- LLVM_DEBUG(dbgs() << "SpillCutCollector::cut VReg=" << OrigVReg
- << " at " << CutIdx << " lanes=" << PrintLaneMask(LanesToCut) << "\n");
-
- assert(OrigVReg.isVirtual() && "Only virtual registers can be cut for spilling");
-
- LiveInterval &LI = LIS.getInterval(OrigVReg);
- SmallVector<LaneBitmask, 4> TouchedLanes;
- SmallVector<LiveRange::Segment, 4> DebugSegsBefore;
- SmallVector<SlotIndex, 8> MainEndPoints;
- DenseMap<LaneBitmask, SmallVector<SlotIndex, 8>> SubrangeEndPoints;
-
- // Store debug information before pruning
- for (const LiveRange::Segment &S : LI.segments) {
- DebugSegsBefore.push_back(S);
- }
-
- // Use MRI to get the accurate full mask for this register class
- LaneBitmask RegClassFullMask = MRI.getMaxLaneMaskForVReg(OrigVReg);
- bool HasSubranges = !LI.subranges().empty();
- bool IsFullRegSpill = (LanesToCut == RegClassFullMask) || (!HasSubranges && MRI.shouldTrackSubRegLiveness(OrigVReg));
-
- LLVM_DEBUG(dbgs() << " HasSubranges=" << HasSubranges
- << " RegClassFullMask=" << PrintLaneMask(RegClassFullMask)
- << " shouldTrackSubRegLiveness=" << MRI.shouldTrackSubRegLiveness(OrigVReg)
- << " IsFullRegSpill=" << IsFullRegSpill << "\n");
-
- if (IsFullRegSpill) {
- // Whole-register spill: prune main range only
- if (LI.liveAt(CutIdx)) {
- TouchedLanes.push_back(LanesToCut);
- LIS.pruneValue(LI, CutIdx, &MainEndPoints);
- LLVM_DEBUG(dbgs() << " Pruned main range (whole-reg) with " << MainEndPoints.size()
- << " endpoints\n");
- }
- } else {
- // Partial-lane spill: refine-then-operate on subranges
- LLVM_DEBUG(dbgs() << " Partial-lane spill: refining subranges for "
- << PrintLaneMask(LanesToCut) << "\n");
-
- // Step 1: Collect subranges that need refinement
- SmallVector<LiveInterval::SubRange *, 4> SubrangesToRefine;
- SmallVector<LiveInterval::SubRange *, 4> PreciseMatches;
-
- for (LiveInterval::SubRange &SR : LI.subranges()) {
- LaneBitmask Overlap = SR.LaneMask & LanesToCut;
- if (Overlap.none()) {
- continue; // No intersection, skip
- }
-
- if (Overlap == SR.LaneMask) {
- // SR is completely contained in LanesToCut
- PreciseMatches.push_back(&SR);
- LLVM_DEBUG(dbgs() << " Found " << (SR.LaneMask == LanesToCut ? "precise" : "subset")
- << " match: " << PrintLaneMask(SR.LaneMask) << "\n");
- } else {
- // Partial overlap: need to refine this subrange
- SubrangesToRefine.push_back(&SR);
- LLVM_DEBUG(dbgs() << " Need to refine: " << PrintLaneMask(SR.LaneMask)
- << " (overlap=" << PrintLaneMask(Overlap) << ")\n");
- }
- }
-
- // Step 2: Refine overlapping subranges into disjoint ones
- for (LiveInterval::SubRange *SR : SubrangesToRefine) {
- LaneBitmask OrigMask = SR->LaneMask;
- LaneBitmask SpillMask = OrigMask & LanesToCut;
- LaneBitmask KeepMask = OrigMask & ~LanesToCut;
-
- LLVM_DEBUG(dbgs() << " Refining " << PrintLaneMask(OrigMask)
- << " into Spill=" << PrintLaneMask(SpillMask)
- << " Keep=" << PrintLaneMask(KeepMask) << "\n");
-
- // Create new subrange for spilled portion (SpillMask is always non-empty here)
- LiveInterval::SubRange *SpillSR = LI.createSubRange(LIS.getVNInfoAllocator(), SpillMask);
- // Copy liveness from original subrange
- SpillSR->assign(*SR, LIS.getVNInfoAllocator());
- PreciseMatches.push_back(SpillSR);
- LLVM_DEBUG(dbgs() << " Created spill subrange: " << PrintLaneMask(SpillMask) << "\n");
-
- // Update original subrange to keep-only portion (KeepMask is always non-empty here)
- SR->LaneMask = KeepMask;
- LLVM_DEBUG(dbgs() << " Updated original to keep: " << PrintLaneMask(KeepMask) << "\n");
- }
-
- // Step 3: Prune only the precise matches for LanesToCut
- for (LiveInterval::SubRange *SR : PreciseMatches) {
- if (SR->liveAt(CutIdx) && (SR->LaneMask & LanesToCut).any()) {
- TouchedLanes.push_back(SR->LaneMask);
- SmallVector<SlotIndex, 8> SubEndPoints;
- LIS.pruneValue(*SR, CutIdx, &SubEndPoints);
- SubrangeEndPoints[SR->LaneMask] = std::move(SubEndPoints);
- LLVM_DEBUG(dbgs() << " Pruned subrange " << PrintLaneMask(SR->LaneMask)
- << " with " << SubrangeEndPoints[SR->LaneMask].size() << " endpoints\n");
- }
- }
-
- // Note: Do NOT prune main range for partial spills - subranges are authoritative
- }
-
- LLVM_DEBUG(dbgs() << " Cut complete: " << TouchedLanes.size()
- << " touched lane masks\n");
-
- return CutEndPoints(OrigVReg, CutIdx, std::move(TouchedLanes),
- std::move(MainEndPoints), std::move(SubrangeEndPoints),
- std::move(DebugSegsBefore));
-}
-
//===----------------------------------------------------------------------===//
// MachineLaneSSAUpdater Implementation
//===----------------------------------------------------------------------===//
@@ -260,72 +147,6 @@ Register MachineLaneSSAUpdater::repairSSAForNewDef(MachineInstr &NewDefMI,
return NewSSAVReg;
}
-Register MachineLaneSSAUpdater::addDefAndRepairAfterSpill(MachineInstr &ReloadMI,
- Register OrigVReg,
- LaneBitmask DefMask,
- const CutEndPoints &EP) {
- LLVM_DEBUG(dbgs() << "MachineLaneSSAUpdater::addDefAndRepairAfterSpill VReg=" << OrigVReg
- << " DefMask=" << PrintLaneMask(DefMask) << "\n");
-
- // Safety checks as specified in the design
- assert(EP.getOrigVReg() == OrigVReg &&
- "CutEndPoints OrigVReg mismatch");
-
- // Validate that DefMask is a subset of the lanes that were actually spilled
- // This allows partial reloads (e.g., reload 32-bit subreg from 64-bit spill)
- LaneBitmask SpilledLanes = LaneBitmask::getNone();
- for (LaneBitmask TouchedMask : EP.getTouchedLaneMasks()) {
- SpilledLanes |= TouchedMask;
- }
- assert((DefMask & SpilledLanes) == DefMask &&
- "DefMask must be a subset of the lanes that were spilled");
-
- LLVM_DEBUG(dbgs() << " DefMask=" << PrintLaneMask(DefMask)
- << " is subset of SpilledLanes=" << PrintLaneMask(SpilledLanes) << "\n");
-
- // Step 1: Index the reload instruction and get its SlotIndex
- SlotIndex ReloadIdx = indexNewInstr(ReloadMI);
- assert(ReloadIdx >= EP.getCutIdx() &&
- "Reload index must be >= cut index");
-
- // Step 2: Extract the new SSA register from the reload instruction
- // The caller should have already created NewVReg and built ReloadMI with it
- Register NewSSAVReg = ReloadMI.defs().begin()->getReg();
- assert(NewSSAVReg.isValid() && NewSSAVReg.isVirtual() &&
- "ReloadMI should define a valid virtual register");
-
- // Step 3: Create and extend NewSSAVReg's LiveInterval using captured EndPoints
- // The endpoints capture where the original register was live after the spill point
- // We need to reconstruct this liveness for the new SSA register
- LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(NewSSAVReg);
-
- // Extend main live range using the captured endpoints
- if (!EP.getMainEndPoints().empty()) {
- LIS.extendToIndices(NewLI, EP.getMainEndPoints());
- LLVM_DEBUG(dbgs() << " Extended NewSSA main range with " << EP.getMainEndPoints().size()
- << " endpoints\n");
- }
-
- // Extend subranges for lane-aware liveness reconstruction
- // Create subranges on-demand for each LaneMask that was captured during spill
- for (const auto &[LaneMask, EndPoints] : EP.getSubrangeEndPoints()) {
- if (!EndPoints.empty()) {
- // Always create a new subrange since NewLI.subranges() is initially empty
- LiveInterval::SubRange *NewSR = NewLI.createSubRange(LIS.getVNInfoAllocator(), LaneMask);
-
- LIS.extendToIndices(*NewSR, EndPoints);
- LLVM_DEBUG(dbgs() << " Created and extended NewSSA subrange " << PrintLaneMask(LaneMask)
- << " with " << EndPoints.size() << " endpoints\n");
- }
- }
-
- // Step 4: Perform common SSA repair (PHI placement + use rewriting)
- performSSARepair(NewSSAVReg, OrigVReg, DefMask, ReloadMI.getParent());
-
- LLVM_DEBUG(dbgs() << " SSA repair complete, returning " << NewSSAVReg << "\n");
- return NewSSAVReg;
-}
-
//===----------------------------------------------------------------------===//
// Common SSA Repair Logic
//===----------------------------------------------------------------------===//
@@ -871,12 +692,6 @@ VNInfo *MachineLaneSSAUpdater::incomingOnEdge(LiveInterval &LI, MachineInstr *Ph
/// Check if \p DefMI's definition reaches \p UseMI's use operand.
/// During SSA reconstruction, LiveIntervals may not be complete yet, so we use
/// dominance-based checking rather than querying LiveInterval reachability.
-///
-/// TODO: This dominance-based approach doesn't handle back edges correctly.
-/// For loop back edges, the definition in the loop body doesn't dominate the
-/// loop header PHI's predecessor, but the value does reach the PHI operand.
-/// We need proper reachability analysis (e.g., checking if there's a path from
-/// DefMI to the predecessor block) to handle loops correctly.
bool MachineLaneSSAUpdater::defReachesUse(MachineInstr *DefMI,
MachineInstr *UseMI,
MachineOperand &UseOp) {
@@ -1018,8 +833,6 @@ Register MachineLaneSSAUpdater::buildRSForSuperUse(MachineInstr *UseMI, MachineO
// Add source for lanes from OldVR (unchanged lanes)
// Handle both contiguous and non-contiguous lane masks
// Non-contiguous example: Redefining only sub2 of vreg_128 leaves LanesFromOld = sub0+sub1+sub3
- // Reference: getCoveringSubRegsForLaneMask from AMDGPUSSARAUtils.h (PR #156049)
- // See: https://github.com/llvm/llvm-project/pull/156049/files#diff-b52a7e2e5b6c174847c74c25b3b579f8cfbac5d53c3364b9b69c52de71532aec
if (LanesFromOld.any()) {
unsigned SubIdx = getSubRegIndexForLaneMask(LanesFromOld, &TRI);
diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt
index 11b031d8f4c58..e31c012e639bb 100644
--- a/llvm/unittests/CodeGen/CMakeLists.txt
+++ b/llvm/unittests/CodeGen/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_unittest(CodeGenTests
MachineInstrBundleIteratorTest.cpp
MachineInstrTest.cpp
MachineLaneSSAUpdaterTest.cpp
+ MachineLaneSSAUpdaterSpillReloadTest.cpp
MachineOperandTest.cpp
RegAllocScoreTest.cpp
PassManagerTest.cpp
diff --git a/llvm/unittests/CodeGen/MachineLaneSSAUpdaterSpillReloadTest.cpp b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterSpillReloadTest.cpp
new file mode 100644
index 0000000000000..81a1f7703f2a8
--- /dev/null
+++ b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterSpillReloadTest.cpp
@@ -0,0 +1,332 @@
+//===- MachineLaneSSAUpdaterSpillReloadTest.cpp - Spill/Reload tests -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Unit tests for MachineLaneSSAUpdater focusing on spill/reload scenarios.
+//
+// NOTE: This file is currently a placeholder for future spiller-specific tests.
+// Analysis showed that repairSSAForNewDef() is sufficient for spill/reload
+// scenarios - no special spill handling is needed. The spiller workflow is:
+// 1. Insert reload instruction before use
+// 2. Call repairSSAForNewDef(ReloadMI, SpilledReg)
+// 3. Done! Uses are rewritten, LiveIntervals naturally pruned
+//
+// Future spiller-specific scenarios (if needed) can be added here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLaneSSAUpdater.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+// TestPass needs to be defined outside anonymous namespace for INITIALIZE_PASS
+struct SpillReloadTestPass : public MachineFunctionPass {
+ static char ID;
+ SpillReloadTestPass() : MachineFunctionPass(ID) {}
+};
+
+char SpillReloadTestPass::ID = 0;
+
+namespace llvm {
+ void initializeSpillReloadTestPassPass(PassRegistry &);
+}
+
+INITIALIZE_PASS(SpillReloadTestPass, "spillreloadtestpass",
+ "spillreloadtestpass", false, false)
+
+namespace {
+
+void initLLVM() {
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+ InitializeAllAsmPrinters();
+ InitializeAllAsmParsers();
+
+ PassRegistry *Registry = PassRegistry::getPassRegistry();
+ initializeCore(*Registry);
+ initializeCodeGen(*Registry);
+}
+
+// Helper to create a target machine for AMDGPU
+std::unique_ptr<TargetMachine> createTargetMachine() {
+ Triple TT("amdgcn--");
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget("", TT, Error);
+ if (!T)
+ return nullptr;
+
+ TargetOptions Options;
+ return std::unique_ptr<TargetMachine>(
+ T->createTargetMachine(TT, "gfx900", "", Options, std::nullopt,
+ std::nullopt, CodeGenOptLevel::Aggressive));
+}
+
+// Helper to parse MIR string with legacy PassManager
+std::unique_ptr<Module> parseMIR(LLVMContext &Context,
+ legacy::PassManagerBase &PM,
+ std::unique_ptr<MIRParser> &MIR,
+ const TargetMachine &TM, StringRef MIRCode) {
+ SMDiagnostic Diagnostic;
+ std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
+ MIR = createMIRParser(std::move(MBuffer), Context);
+ if (!MIR)
+ return nullptr;
+
+ std::unique_ptr<Module> M = MIR->parseIRModule();
+ if (!M)
+ return nullptr;
+
+ M->setDataLayout(TM.createDataLayout());
+
+ MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(&TM);
+ if (MIR->parseMachineFunctions(*M, MMIWP->getMMI()))
+ return nullptr;
+ PM.add(MMIWP);
+
+ return M;
+}
+
+template <typename AnalysisType>
+struct SpillReloadTestPassT : public SpillReloadTestPass {
+ typedef std::function<void(MachineFunction&, AnalysisType&)> TestFx;
+
+ SpillReloadTestPassT() {
+ // We should never call this but always use PM.add(new SpillReloadTestPass(...))
+ abort();
+ }
+
+ SpillReloadTestPassT(TestFx T, bool ShouldPass)
+ : T(T), ShouldPass(ShouldPass) {
+ initializeSpillReloadTestPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ AnalysisType &A = getAnalysis<AnalysisType>();
+ T(MF, A);
+ bool VerifyResult = MF.verify(this, /* Banner=*/nullptr,
+ /*OS=*/&llvm::errs(),
+ /* AbortOnError=*/false);
+ EXPECT_EQ(VerifyResult, ShouldPass);
+ return true;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<AnalysisType>();
+ AU.addPreserved<AnalysisType>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ TestFx T;
+ bool ShouldPass;
+};
+
+template <typename AnalysisType>
+static void doTest(StringRef MIRFunc,
+ typename SpillReloadTestPassT<AnalysisType>::TestFx T,
+ bool ShouldPass = true) {
+ initLLVM();
+
+ LLVMContext Context;
+ std::unique_ptr<TargetMachine> TM = createTargetMachine();
+ if (!TM)
+ GTEST_SKIP() << "AMDGPU target not available";
+
+ legacy::PassManager PM;
+ std::unique_ptr<MIRParser> MIR;
+ std::unique_ptr<Module> M = parseMIR(Context, PM, MIR, *TM, MIRFunc);
+ ASSERT_TRUE(M);
+
+ PM.add(new SpillReloadTestPassT<AnalysisType>(T, ShouldPass));
+
+ PM.run(*M);
+}
+
+static void liveIntervalsTest(StringRef MIRFunc,
+ SpillReloadTestPassT<LiveIntervalsWrapperPass>::TestFx T,
+ bool ShouldPass = true) {
+ SmallString<512> S;
+ StringRef MIRString = (Twine(R"MIR(
+--- |
+ define amdgpu_kernel void @func() { ret void }
+...
+---
+name: func
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+body: |
+ bb.0:
+)MIR") + Twine(MIRFunc) + Twine("...\n")).toNullTerminatedStringRef(S);
+
+ doTest<LiveIntervalsWrapperPass>(MIRString, T, ShouldPass);
+}
+
+//===----------------------------------------------------------------------===//
+// Spill/Reload Tests
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Test 1: Simple Linear Spill/Reload
+//===----------------------------------------------------------------------===//
+//
+// This test demonstrates that repairSSAForNewDef() works for spill/reload
+// scenarios without any special handling.
+//
+// CFG Structure:
+// BB0 (entry)
+// | %0 = initial_def
+// |
+// BB1 (intermediate)
+// | some operations
+// |
+// BB2 (reload & use)
+// | %0 = RELOAD (simulated as V_MOV_B32)
+// | use %0
+//
+// Scenario:
+// - %0 is defined in BB0 and used in BB2
+// - Insert a reload instruction in BB2 that redefines %0 (violating SSA)
+// - Call repairSSAForNewDef() to fix the SSA violation
+// - Verify that uses are rewritten and LiveIntervals are correct
+//
+// Expected Behavior:
+// - Reload renamed to define a new register
+// - Uses after reload rewritten to new register
+// - OrigReg's LiveInterval naturally pruned to BB0 only
+// - No PHI needed (linear CFG)
+//
+TEST(MachineLaneSSAUpdaterSpillReloadTest, SimpleLinearSpillReload) {
+ liveIntervalsTest(R"MIR(
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2
+ %1:vgpr_32 = V_MOV_B32_e32 100, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ %2:vgpr_32 = V_ADD_U32_e32 %0, %1, implicit $exec
+ S_ENDPGM 0
+)MIR",
+ [](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
+ LiveIntervals &LIS = LISWrapper.getLIS();
+ MachineDominatorTree MDT(MF);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Verify we have 3 blocks as expected
+ ASSERT_EQ(MF.size(), 3u) << "Should have bb.0, bb.1, bb.2";
+
+ MachineBasicBlock *BB0 = MF.getBlockNumbered(0);
+ MachineBasicBlock *BB2 = MF.getBlockNumbered(2);
+
+ // Find %0 definition in BB0 (first instruction should be V_MOV_B32)
+ MachineInstr *OrigDefMI = &*BB0->begin();
+ ASSERT_TRUE(OrigDefMI && OrigDefMI->getNumOperands() > 0);
+ Register OrigReg = OrigDefMI->getOperand(0).getReg();
+ ASSERT_TRUE(OrigReg.isValid()) << "Should have valid original register %0";
+
+ // STEP 1: Insert reload instruction in BB2 before the use
+ // This creates a second definition of %0, violating SSA
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ auto InsertPt = BB2->getFirstNonPHI();
+
+ // Get opcode and register from the existing V_MOV_B32 in BB0
+ unsigned MovOpcode = OrigDefMI->getOpcode();
+ Register ExecReg = OrigDefMI->getOperand(2).getReg();
+
+ // Insert reload: %0 = V_MOV_B32 999 (simulating load from stack)
+ // This violates SSA because %0 is already defined in BB0
+ MachineInstr *ReloadMI = BuildMI(*BB2, InsertPt, DebugLoc(),
+ TII->get(MovOpcode), OrigReg)
+ .addImm(999) // Simulated reload value
+ .addReg(ExecReg, RegState::Implicit);
+
+ // Set MachineFunction properties to allow SSA
+ MF.getProperties().set(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
+ // STEP 2: Call repairSSAForNewDef to fix the SSA violation
+ // This will:
+ // - Rename the reload to define a new register
+ // - Rewrite uses dominated by the reload
+ // - Naturally prune OrigReg's LiveInterval via recomputation
+ MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
+ Register ReloadReg = Updater.repairSSAForNewDef(*ReloadMI, OrigReg);
+
+ // VERIFY RESULTS:
+
+ // 1. ReloadReg should be valid and different from OrigReg
+ EXPECT_TRUE(ReloadReg.isValid()) << "Updater should return valid register";
+ EXPECT_NE(ReloadReg, OrigReg) << "Reload register should be different from original";
+
+ // 2. ReloadMI should define the new ReloadReg (not OrigReg)
+ EXPECT_EQ(ReloadMI->getOperand(0).getReg(), ReloadReg)
+ << "ReloadMI should define new reload register";
+
+ // 3. Verify the ReloadReg has a valid LiveInterval
+ EXPECT_TRUE(LIS.hasInterval(ReloadReg))
+ << "Reload register should have live interval";
+
+ // 4. No PHI should be inserted (linear CFG, reload dominates subsequent uses)
+ bool FoundPHI = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.isPHI()) {
+ FoundPHI = true;
+ break;
+ }
+ }
+ }
+ EXPECT_FALSE(FoundPHI)
+ << "Linear CFG should not require PHI nodes";
+
+ // 5. Verify OrigReg's LiveInterval was naturally pruned
+ // It should only cover BB0 now (definition to end of block)
+ EXPECT_TRUE(LIS.hasInterval(OrigReg))
+ << "Original register should still have live interval";
+ const LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+
+ // The performSSARepair recomputation naturally prunes OrigReg
+ // because all uses in BB2 were rewritten to ReloadReg
+ SlotIndex OrigEnd = OrigLI.endIndex();
+
+ // OrigReg should not extend into BB2 where ReloadReg took over
+ SlotIndex BB2Start = LIS.getMBBStartIdx(BB2);
+ EXPECT_LE(OrigEnd, BB2Start)
+ << "Original register should not extend into BB2 after reload";
+ });
+}
+
+} // anonymous namespace
+
>From 92129636f51a473b1f2db53daddfe9df73ab26d1 Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Wed, 15 Oct 2025 16:39:37 +0000
Subject: [PATCH 7/7] [CodeGen] MachineLaneSSAUpdater Unit test debug output
fixed
---
.../CodeGen/MachineLaneSSAUpdaterTest.cpp | 328 ++++++++----------
1 file changed, 151 insertions(+), 177 deletions(-)
diff --git a/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
index ffb49b0b6bb9e..172cbf33dce2f 100644
--- a/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
+++ b/llvm/unittests/CodeGen/MachineLaneSSAUpdaterTest.cpp
@@ -32,6 +32,8 @@
#include "llvm/TargetParser/Triple.h"
#include "gtest/gtest.h"
+#define DEBUG_TYPE "machine-lane-ssa-updater-test"
+
using namespace llvm;
// TestPass needs to be defined outside anonymous namespace for INITIALIZE_PASS
@@ -297,13 +299,9 @@ TEST(MachineLaneSSAUpdaterTest, NewDefInsertsPhiAndRewritesUses) {
EXPECT_TRUE(LIS.hasInterval(NewReg)) << "New register should have live interval";
EXPECT_TRUE(LIS.hasInterval(OrigReg)) << "Original register should still have live interval";
- // Note: MachineFunction verification happens in TestPassT::runOnMachineFunction
- // If verification fails, print the MachineFunction for debugging
- if (!MF.verify(nullptr, /* Banner=*/nullptr, /*OS=*/nullptr, /* AbortOnError=*/false)) {
- llvm::errs() << "MachineFunction verification failed after SSA repair:\n";
- MF.print(llvm::errs());
- LIS.print(llvm::errs());
- }
+ // Verify the MachineFunction is still valid after SSA repair
+ EXPECT_TRUE(MF.verify(nullptr, /* Banner=*/nullptr, /*OS=*/nullptr, /* AbortOnError=*/false))
+ << "MachineFunction verification failed after SSA repair";
});
}
@@ -405,7 +403,7 @@ TEST(MachineLaneSSAUpdaterTest, MultiplePhiInsertion) {
++UseCountBefore;
}
ASSERT_GT(UseCountBefore, 0u) << "Original register should have uses";
- llvm::errs() << "Original register has " << UseCountBefore << " uses before SSA repair\n";
+ LLVM_DEBUG(dbgs() << "Original register has " << UseCountBefore << " uses before SSA repair\n");
// Get V_MOV opcode from bb.0
MachineBasicBlock *BB0 = MF.getBlockNumbered(0);
@@ -442,8 +440,8 @@ TEST(MachineLaneSSAUpdaterTest, MultiplePhiInsertion) {
if (MI.isPHI()) {
++PHICount;
++BlockPHIs;
- llvm::errs() << "Found PHI in BB#" << MBB.getNumber() << ": ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found PHI in BB#" << MBB.getNumber() << ": ");
+ LLVM_DEBUG(MI.print(dbgs()));
}
}
if (BlockPHIs > 0) {
@@ -451,15 +449,15 @@ TEST(MachineLaneSSAUpdaterTest, MultiplePhiInsertion) {
}
}
- llvm::errs() << "Total PHI nodes inserted: " << PHICount << "\n";
+ LLVM_DEBUG(dbgs() << "Total PHI nodes inserted: " << PHICount << "\n");
// Check for first PHI in bb.6 (joins bb.4 and bb.5)
bool FoundPHIInBB6 = false;
for (MachineInstr &MI : *BB6) {
if (MI.isPHI()) {
FoundPHIInBB6 = true;
- llvm::errs() << "First PHI in bb.6: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "First PHI in bb.6: ");
+ LLVM_DEBUG(MI.print(dbgs()));
// Verify it has 2 incoming values (4 operands: 2 x (reg, mbb))
unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
EXPECT_EQ(NumIncoming, 2u) << "First PHI in bb.6 should have 2 incoming values (from bb.4 and bb.5)";
@@ -473,8 +471,8 @@ TEST(MachineLaneSSAUpdaterTest, MultiplePhiInsertion) {
for (MachineInstr &MI : *BB7) {
if (MI.isPHI()) {
FoundPHIInBB7 = true;
- llvm::errs() << "Second PHI in bb.7: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Second PHI in bb.7: ");
+ LLVM_DEBUG(MI.print(dbgs()));
// Verify it has 2 incoming values (4 operands: 2 x (reg, mbb))
unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
EXPECT_EQ(NumIncoming, 2u) << "Second PHI in bb.7 should have 2 incoming values (from bb.2 with %1 and bb.6 with first PHI result)";
@@ -490,12 +488,9 @@ TEST(MachineLaneSSAUpdaterTest, MultiplePhiInsertion) {
EXPECT_TRUE(LIS.hasInterval(NewReg)) << "New register should have live interval";
EXPECT_TRUE(LIS.hasInterval(OrigReg)) << "Original register should have live interval";
- // Debug output if verification fails
- if (!MF.verify(nullptr, nullptr, nullptr, false)) {
- llvm::errs() << "MachineFunction verification failed:\n";
- MF.print(llvm::errs());
- LIS.print(llvm::errs());
- }
+ // Verify the MachineFunction is still valid
+ EXPECT_TRUE(MF.verify(nullptr, nullptr, nullptr, false))
+ << "MachineFunction verification failed";
});
}
@@ -587,18 +582,18 @@ TEST(MachineLaneSSAUpdaterTest, SubregisterLaneTracking) {
const TargetRegisterClass *RC64 = MRI.getRegClass(Reg64);
ASSERT_EQ(TRI->getRegSizeInBits(*RC64), 64u) << "Register %3 should be 64-bit";
- llvm::errs() << "Using 64-bit register: %" << Reg64.virtRegIndex() << " (raw: " << Reg64 << ")\n";
+ LLVM_DEBUG(dbgs() << "Using 64-bit register: %" << Reg64.virtRegIndex() << " (raw: " << Reg64 << ")\n");
// Verify it has subranges for lane tracking
ASSERT_TRUE(LIS.hasInterval(Reg64)) << "Register should have live interval";
LiveInterval &LI = LIS.getInterval(Reg64);
if (LI.hasSubRanges()) {
- llvm::errs() << "Register has subranges (lane tracking active)\n";
+ LLVM_DEBUG(dbgs() << "Register has subranges (lane tracking active)\n");
for (const LiveInterval::SubRange &SR : LI.subranges()) {
- llvm::errs() << " Lane mask: " << PrintLaneMask(SR.LaneMask) << "\n";
+ LLVM_DEBUG(dbgs() << " Lane mask: " << PrintLaneMask(SR.LaneMask) << "\n");
}
} else {
- llvm::errs() << "Warning: Register does not have subranges\n";
+ LLVM_DEBUG(dbgs() << "Warning: Register does not have subranges\n");
}
// Find the subreg index for a 32-bit subreg of the 64-bit register
@@ -612,8 +607,8 @@ TEST(MachineLaneSSAUpdaterTest, SubregisterLaneTracking) {
}
ASSERT_NE(Sub0Idx, 0u) << "Could not find 32-bit subregister index";
LaneBitmask Sub0Mask = TRI->getSubRegIndexLaneMask(Sub0Idx);
- llvm::errs() << "Sub0 index=" << Sub0Idx << " (" << TRI->getSubRegIndexName(Sub0Idx)
- << "), mask=" << PrintLaneMask(Sub0Mask) << "\n";
+ LLVM_DEBUG(dbgs() << "Sub0 index=" << Sub0Idx << " (" << TRI->getSubRegIndexName(Sub0Idx)
+ << "), mask=" << PrintLaneMask(Sub0Mask) << "\n");
// Insert new definition in bb.3 that defines Reg64.sub0 (partial update, violating SSA)
// Use V_MOV with immediate - no liveness dependencies
@@ -655,7 +650,7 @@ TEST(MachineLaneSSAUpdaterTest, SubregisterLaneTracking) {
MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, Reg64);
- llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << " (raw: " << NewReg << ")\n";
+ LLVM_DEBUG(dbgs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << " (raw: " << NewReg << ")\n");
// VERIFY RESULTS:
@@ -676,8 +671,8 @@ TEST(MachineLaneSSAUpdaterTest, SubregisterLaneTracking) {
for (MachineInstr &MI : *BB4) {
if (MI.isPHI()) {
FoundPHI = true;
- llvm::errs() << "Found PHI in bb.4: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found PHI in bb.4: ");
+ LLVM_DEBUG(MI.print(dbgs()));
break;
}
}
@@ -686,12 +681,9 @@ TEST(MachineLaneSSAUpdaterTest, SubregisterLaneTracking) {
// 4. Verify LiveIntervals are valid
EXPECT_TRUE(LIS.hasInterval(NewReg)) << "New register should have live interval";
- // Debug output if verification fails
- if (!MF.verify(nullptr, nullptr, nullptr, false)) {
- llvm::errs() << "MachineFunction verification failed:\n";
- MF.print(llvm::errs());
- LIS.print(llvm::errs());
- }
+ // Verify the MachineFunction is still valid
+ EXPECT_TRUE(MF.verify(nullptr, nullptr, nullptr, false))
+ << "MachineFunction verification failed";
});
}
@@ -843,7 +835,7 @@ TEST(MachineLaneSSAUpdaterTest, SubregDefToFullRegPHI) {
MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, RegX);
- llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << " (raw: " << NewReg << ")\n";
+ LLVM_DEBUG(dbgs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << " (raw: " << NewReg << ")\n");
// VERIFY RESULTS:
@@ -864,8 +856,8 @@ TEST(MachineLaneSSAUpdaterTest, SubregDefToFullRegPHI) {
if (MI.isPHI()) {
FoundPHI = true;
PHIReg = MI.getOperand(0).getReg();
- llvm::errs() << "PHI in bb.7 after SSA repair: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "PHI in bb.7 after SSA repair: ");
+ LLVM_DEBUG(MI.print(dbgs()));
break;
}
}
@@ -877,8 +869,8 @@ TEST(MachineLaneSSAUpdaterTest, SubregDefToFullRegPHI) {
for (MachineInstr &MI : *BB6) {
if (MI.getOpcode() == TargetOpcode::REG_SEQUENCE) {
FoundREGSEQ = true;
- llvm::errs() << "Found REG_SEQUENCE in bb.6: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found REG_SEQUENCE in bb.6: ");
+ LLVM_DEBUG(MI.print(dbgs()));
// Should combine new sub0 with original sub1
EXPECT_GE(MI.getNumOperands(), 5u) << "REG_SEQUENCE should have result + 2 source pairs";
@@ -891,12 +883,9 @@ TEST(MachineLaneSSAUpdaterTest, SubregDefToFullRegPHI) {
EXPECT_TRUE(LIS.hasInterval(NewReg));
EXPECT_TRUE(LIS.hasInterval(PHIReg));
- // Debug output if verification fails
- if (!MF.verify(nullptr, nullptr, nullptr, false)) {
- llvm::errs() << "MachineFunction verification failed:\n";
- MF.print(llvm::errs());
- LIS.print(llvm::errs());
- }
+ // Verify the MachineFunction is still valid
+ EXPECT_TRUE(MF.verify(nullptr, nullptr, nullptr, false))
+ << "MachineFunction verification failed";
});
}
@@ -985,7 +974,7 @@ TEST(MachineLaneSSAUpdaterTest, LoopWithDefInBody) {
Register OrigReg = OrigDefMI->getOperand(0).getReg();
ASSERT_TRUE(OrigReg.isValid()) << "Could not get original register";
- llvm::errs() << "Original register: %" << OrigReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "Original register: %" << OrigReg.virtRegIndex() << "\n");
// Insert new definition in loop body (bb.2)
// This violates SSA because %1 is defined both in bb.0 and bb.2
@@ -1008,7 +997,7 @@ TEST(MachineLaneSSAUpdaterTest, LoopWithDefInBody) {
MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
- llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n");
// VERIFY RESULTS:
@@ -1024,8 +1013,8 @@ TEST(MachineLaneSSAUpdaterTest, LoopWithDefInBody) {
for (MachineInstr &MI : *BB1) {
if (MI.isPHI()) {
FoundPHIInHeader = true;
- llvm::errs() << "Found PHI in loop header (bb.1): ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found PHI in loop header (bb.1): ");
+ LLVM_DEBUG(MI.print(dbgs()));
// Verify PHI has 2 incoming values
unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
@@ -1044,11 +1033,11 @@ TEST(MachineLaneSSAUpdaterTest, LoopWithDefInBody) {
if (IncomingMBB == BB0) {
HasEntryPath = true;
EXPECT_EQ(IncomingReg, OrigReg) << "Entry path should use OrigReg";
- llvm::errs() << " Entry path (bb.0): %" << IncomingReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << " Entry path (bb.0): %" << IncomingReg.virtRegIndex() << "\n");
} else if (IncomingMBB == BB2) {
HasBackEdge = true;
EXPECT_EQ(IncomingReg, NewReg) << "Back edge should use NewReg";
- llvm::errs() << " Back edge (bb.2): %" << IncomingReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << " Back edge (bb.2): %" << IncomingReg.virtRegIndex() << "\n");
}
}
@@ -1064,12 +1053,9 @@ TEST(MachineLaneSSAUpdaterTest, LoopWithDefInBody) {
EXPECT_TRUE(LIS.hasInterval(NewReg));
EXPECT_TRUE(LIS.hasInterval(OrigReg));
- // Debug output if verification fails
- if (!MF.verify(nullptr, nullptr, nullptr, false)) {
- llvm::errs() << "MachineFunction verification failed:\n";
- MF.print(llvm::errs());
- LIS.print(llvm::errs());
- }
+ // Verify the MachineFunction is still valid
+ EXPECT_TRUE(MF.verify(nullptr, nullptr, nullptr, false))
+ << "MachineFunction verification failed";
});
}
@@ -1194,7 +1180,7 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
Register OrigReg = OrigDefMI->getOperand(0).getReg();
ASSERT_TRUE(OrigReg.isValid()) << "Could not get original register X";
- llvm::errs() << "Original register X: %" << OrigReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "Original register X: %" << OrigReg.virtRegIndex() << "\n");
// Find the use-before-def in bb.1 (loop header)
MachineInstr *UseBeforeDefMI = nullptr;
@@ -1209,8 +1195,8 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
}
}
ASSERT_TRUE(UseBeforeDefMI) << "Could not find use-before-def in loop header";
- llvm::errs() << "Found use-before-def in bb.1: %"
- << UseBeforeDefMI->getOperand(0).getReg().virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "Found use-before-def in bb.1: %"
+ << UseBeforeDefMI->getOperand(0).getReg().virtRegIndex() << "\n");
// Insert new definition in bb.3 (else branch): X = 99
MachineInstr *MovInst = &*BB0->begin();
@@ -1232,7 +1218,7 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
- llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n");
// VERIFY RESULTS:
@@ -1248,8 +1234,8 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
if (MI.isPHI()) {
FoundPHI1 = true;
PHI1Reg = MI.getOperand(0).getReg();
- llvm::errs() << "Found PHI1 in diamond join (bb.4): ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found PHI1 in diamond join (bb.4): ");
+ LLVM_DEBUG(MI.print(dbgs()));
// Should have 2 incoming: OrigReg from bb.2, NewReg from bb.3
unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
@@ -1266,7 +1252,7 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
if (MI.isPHI())
TotalPHICount++;
}
- llvm::errs() << "Total PHIs in loop header: " << TotalPHICount << "\n";
+ LLVM_DEBUG(dbgs() << "Total PHIs in loop header: " << TotalPHICount << "\n");
EXPECT_EQ(TotalPHICount, 2u) << "Loop header should have 2 PHIs (induction var + SSA repair)";
// Now find the SSA repair PHI (not the induction variable PHI %3)
@@ -1283,8 +1269,8 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
FoundPHI2 = true;
PHI2Reg = PHIResult;
- llvm::errs() << "Found PHI2 (SSA repair) in loop header (bb.1): ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found PHI2 (SSA repair) in loop header (bb.1): ");
+ LLVM_DEBUG(MI.print(dbgs()));
// Should have 2 incoming: OrigReg from bb.0, PHI1Reg from bb.5
unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
@@ -1316,8 +1302,8 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
// 4. Use-before-def in bb.1 should be rewritten to PHI2
EXPECT_EQ(UseBeforeDefMI->getOperand(1).getReg(), PHI2Reg)
<< "Use-before-def should be rewritten to PHI2 result";
- llvm::errs() << "Use-before-def correctly rewritten to PHI2: %"
- << PHI2Reg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "Use-before-def correctly rewritten to PHI2: %"
+ << PHI2Reg.virtRegIndex() << "\n");
// 5. Use in latch (bb.5) should be rewritten to PHI1
// Find instruction using PHI1 (originally used %1)
@@ -1331,9 +1317,9 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isUse() && MO.getReg() == PHI1Reg) {
- llvm::errs() << "Latch use correctly rewritten to PHI1: %"
- << PHI1Reg.virtRegIndex() << " in: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Latch use correctly rewritten to PHI1: %"
+ << PHI1Reg.virtRegIndex() << " in: ");
+ LLVM_DEBUG(MI.print(dbgs()));
FoundLatchUse = true;
break;
}
@@ -1348,12 +1334,9 @@ TEST(MachineLaneSSAUpdaterTest, ComplexLoopWithDiamondAndUseBeforeDef) {
EXPECT_TRUE(LIS.hasInterval(PHI1Reg));
EXPECT_TRUE(LIS.hasInterval(PHI2Reg));
- // Debug output if verification fails
- if (!MF.verify(nullptr, nullptr, nullptr, false)) {
- llvm::errs() << "MachineFunction verification failed:\n";
- MF.print(llvm::errs());
- LIS.print(llvm::errs());
- }
+ // Verify the MachineFunction is still valid
+ EXPECT_TRUE(MF.verify(nullptr, nullptr, nullptr, false))
+ << "MachineFunction verification failed";
});
}
@@ -1442,7 +1425,7 @@ body: |
[](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
LiveIntervals &LIS = LISWrapper.getLIS();
MachineDominatorTree MDT(MF);
- llvm::errs() << "\n=== MultipleSubregRedefsInLoop Test ===\n";
+ LLVM_DEBUG(dbgs() << "\n=== MultipleSubregRedefsInLoop Test ===\n");
// Get basic blocks
auto BBI = MF.begin();
@@ -1480,8 +1463,8 @@ body: |
ASSERT_NE(Sub0Idx, 0u) << "Should find sub0 index";
ASSERT_NE(Sub1Idx, 0u) << "Should find sub1 index";
- llvm::errs() << "Using 64-bit register: %" << OrigReg.virtRegIndex()
- << " with sub0=" << Sub0Idx << ", sub1=" << Sub1Idx << "\n";
+ LLVM_DEBUG(dbgs() << "Using 64-bit register: %" << OrigReg.virtRegIndex()
+ << " with sub0=" << Sub0Idx << ", sub1=" << Sub1Idx << "\n");
// Get V_MOV opcode and EXEC register from existing instruction
MachineInstr *MovInst = nullptr;
@@ -1497,7 +1480,7 @@ body: |
unsigned MovOpcode = MovInst->getOpcode();
// === FIRST INSERTION: X.sub0 in BB5 (else branch) ===
- llvm::errs() << "\n=== First insertion: X.sub0 in BB5 ===\n";
+ LLVM_DEBUG(dbgs() << "\n=== First insertion: X.sub0 in BB5 ===\n");
// Find insertion point in BB5 (after the use of X.sub0)
MachineInstr *InsertPoint1 = nullptr;
@@ -1517,17 +1500,17 @@ body: |
.addReg(ExecReg, RegState::Implicit);
MachineInstr &NewDefMI1 = *MIB1;
- llvm::errs() << "Created first def in BB5: ";
- NewDefMI1.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Created first def in BB5: ");
+ LLVM_DEBUG(NewDefMI1.print(dbgs()));
// Create SSA updater and repair after first insertion
MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
Register NewReg1 = Updater.repairSSAForNewDef(NewDefMI1, OrigReg);
- llvm::errs() << "SSA repair #1 created new register: %" << NewReg1.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "SSA repair #1 created new register: %" << NewReg1.virtRegIndex() << "\n");
// === SECOND INSERTION: X.sub1 in BB3 (after increment) ===
- llvm::errs() << "\n=== Second insertion: X.sub1 in BB3 (after increment) ===\n";
+ LLVM_DEBUG(dbgs() << "\n=== Second insertion: X.sub1 in BB3 (after increment) ===\n");
// Find the increment instruction in BB3 (look for vreg_64 def)
MachineInstr *IncrementMI = nullptr;
@@ -1539,8 +1522,8 @@ body: |
if (DefReg.isVirtual() && DefReg == Register::index2VirtReg(3)) {
IncrementMI = &MI;
IncrementReg = DefReg; // This is %3
- llvm::errs() << "Found increment: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found increment: ");
+ LLVM_DEBUG(MI.print(dbgs()));
break;
}
}
@@ -1557,22 +1540,22 @@ body: |
.addReg(ExecReg, RegState::Implicit);
MachineInstr &NewDefMI2 = *MIB2;
- llvm::errs() << "Created second def in BB3 (redefining %3.sub1): ";
- NewDefMI2.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Created second def in BB3 (redefining %3.sub1): ");
+ LLVM_DEBUG(NewDefMI2.print(dbgs()));
// Repair SSA after second insertion (for %3, the increment result)
Register NewReg2 = Updater.repairSSAForNewDef(NewDefMI2, IncrementReg);
- llvm::errs() << "SSA repair #2 created new register: %" << NewReg2.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "SSA repair #2 created new register: %" << NewReg2.virtRegIndex() << "\n");
// === Verification ===
- llvm::errs() << "\n=== Verification ===\n";
+ LLVM_DEBUG(dbgs() << "\n=== Verification ===\n");
// Print final MIR
- llvm::errs() << "Final BB3 (latch):\n";
- for (MachineInstr &MI : *BB3) {
- MI.print(llvm::errs());
- }
+ LLVM_DEBUG(dbgs() << "Final BB3 (latch):\n");
+ LLVM_DEBUG(for (MachineInstr &MI : *BB3) {
+ MI.print(dbgs());
+ });
// 1. Should have PHI for 32-bit X.sub0 at BB3 (diamond join)
bool FoundSub0PHI = false;
@@ -1581,8 +1564,8 @@ body: |
Register PHIResult = MI.getOperand(0).getReg();
if (PHIResult != Register::index2VirtReg(3)) { // Not the increment result PHI
FoundSub0PHI = true;
- llvm::errs() << "Found sub0 PHI in BB3: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found sub0 PHI in BB3: ");
+ LLVM_DEBUG(MI.print(dbgs()));
}
}
}
@@ -1593,8 +1576,8 @@ body: |
for (MachineInstr &MI : *BB3) {
if (MI.getOpcode() == TargetOpcode::REG_SEQUENCE) {
FoundREGSEQ = true;
- llvm::errs() << "Found REG_SEQUENCE in BB3: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found REG_SEQUENCE in BB3: ");
+ LLVM_DEBUG(MI.print(dbgs()));
// Verify it composes both lanes
unsigned NumSources = (MI.getNumOperands() - 1) / 2;
@@ -1608,12 +1591,9 @@ body: |
EXPECT_TRUE(LIS.hasInterval(NewReg1));
EXPECT_TRUE(LIS.hasInterval(NewReg2));
- // Debug output if verification fails
- if (!MF.verify(nullptr, nullptr, nullptr, false)) {
- llvm::errs() << "MachineFunction verification failed:\n";
- MF.print(llvm::errs());
- LIS.print(llvm::errs());
- }
+ // Verify the MachineFunction is still valid
+ EXPECT_TRUE(MF.verify(nullptr, nullptr, nullptr, false))
+ << "MachineFunction verification failed";
});
}
@@ -1715,7 +1695,7 @@ body: |
[](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
LiveIntervals &LIS = LISWrapper.getLIS();
MachineDominatorTree MDT(MF);
- llvm::errs() << "\n=== NestedLoopsWithSSARepair Test ===\n";
+ LLVM_DEBUG(dbgs() << "\n=== NestedLoopsWithSSARepair Test ===\n");
// Get basic blocks
auto BBI = MF.begin();
@@ -1733,7 +1713,7 @@ body: |
Register OrigReg = Register::index2VirtReg(0);
ASSERT_TRUE(OrigReg.isValid()) << "Register %0 should be valid";
- llvm::errs() << "Original register: %" << OrigReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "Original register: %" << OrigReg.virtRegIndex() << "\n");
// Get V_MOV opcode and EXEC register
MachineInstr *MovInst = &*BB0->begin();
@@ -1741,14 +1721,14 @@ body: |
Register ExecReg = MovInst->getOperand(2).getReg();
// Print initial state
- llvm::errs() << "\nInitial BB2 (inner loop header):\n";
+ LLVM_DEBUG(dbgs() << "\nInitial BB2 (inner loop header):\n");
for (MachineInstr &MI : *BB2) {
- MI.print(llvm::errs());
+ LLVM_DEBUG(MI.print(dbgs()));
}
- llvm::errs() << "\nInitial BB1 (outer loop header):\n";
+ LLVM_DEBUG(dbgs() << "\nInitial BB1 (outer loop header):\n");
for (MachineInstr &MI : *BB1) {
- MI.print(llvm::errs());
+ LLVM_DEBUG(MI.print(dbgs()));
}
// Insert new definition in BB3 (inner loop body)
@@ -1768,31 +1748,31 @@ body: |
.addImm(999)
.addReg(ExecReg, RegState::Implicit);
- llvm::errs() << "\nInserted new def in BB3 (inner loop body): ";
- NewDefMI->print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "\nInserted new def in BB3 (inner loop body): ");
+ LLVM_DEBUG(NewDefMI->print(dbgs()));
// Create SSA updater and repair
MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
- llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n");
// === Verification ===
- llvm::errs() << "\n=== Verification ===\n";
+ LLVM_DEBUG(dbgs() << "\n=== Verification ===\n");
- llvm::errs() << "\nFinal BB2 (inner loop header):\n";
+ LLVM_DEBUG(dbgs() << "\nFinal BB2 (inner loop header):\n");
for (MachineInstr &MI : *BB2) {
- MI.print(llvm::errs());
+ LLVM_DEBUG(MI.print(dbgs()));
}
- llvm::errs() << "\nFinal BB1 (outer loop header):\n";
+ LLVM_DEBUG(dbgs() << "\nFinal BB1 (outer loop header):\n");
for (MachineInstr &MI : *BB1) {
- MI.print(llvm::errs());
+ LLVM_DEBUG(MI.print(dbgs()));
}
- llvm::errs() << "\nFinal BB4 (outer loop body after inner):\n";
+ LLVM_DEBUG(dbgs() << "\nFinal BB4 (outer loop body after inner):\n");
for (MachineInstr &MI : *BB4) {
- MI.print(llvm::errs());
+ LLVM_DEBUG(MI.print(dbgs()));
}
// 1. Inner loop header (BB2) should have NEW PHI created by SSA repair
@@ -1808,8 +1788,8 @@ body: |
if (IncomingMBB == BB3 && IncomingReg == NewReg) {
FoundSSARepairPHI = true;
SSARepairPHIReg = MI.getOperand(0).getReg();
- llvm::errs() << "Found SSA repair PHI in inner loop header: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found SSA repair PHI in inner loop header: ");
+ LLVM_DEBUG(MI.print(dbgs()));
// Should have incoming from BB1 and BB3
unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
@@ -1828,8 +1808,8 @@ body: |
for (MachineInstr &MI : *BB1) {
if (MI.isPHI() && MI.getOperand(0).getReg() == Register::index2VirtReg(1)) {
FoundOuterPHI = true;
- llvm::errs() << "Found outer loop PHI: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found outer loop PHI: ");
+ LLVM_DEBUG(MI.print(dbgs()));
}
}
EXPECT_TRUE(FoundOuterPHI) << "Should find outer loop PHI in BB1";
@@ -1843,8 +1823,8 @@ body: |
Register UseReg = MI.getOperand(i).getReg();
if (UseReg.isVirtual()) {
FoundUseInBB4 = true;
- llvm::errs() << "Found use in BB4: %" << UseReg.virtRegIndex() << " in ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found use in BB4: %" << UseReg.virtRegIndex() << " in ");
+ LLVM_DEBUG(MI.print(dbgs()));
}
}
}
@@ -1855,12 +1835,9 @@ body: |
// 4. Verify LiveIntervals
EXPECT_TRUE(LIS.hasInterval(NewReg));
- // Debug output if verification fails
- if (!MF.verify(nullptr, nullptr, nullptr, false)) {
- llvm::errs() << "MachineFunction verification failed:\n";
- MF.print(llvm::errs());
- LIS.print(llvm::errs());
- }
+ // Verify the MachineFunction is still valid
+ EXPECT_TRUE(MF.verify(nullptr, nullptr, nullptr, false))
+ << "MachineFunction verification failed";
});
}
@@ -2003,7 +1980,7 @@ body: |
[](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
LiveIntervals &LIS = LISWrapper.getLIS();
MachineDominatorTree MDT(MF);
- llvm::errs() << "\n=== MultipleSubregUsesAcrossDiamonds Test ===\n";
+ LLVM_DEBUG(dbgs() << "\n=== MultipleSubregUsesAcrossDiamonds Test ===\n");
// Get basic blocks
auto BBI = MF.begin();
@@ -2023,7 +2000,7 @@ body: |
Register OrigReg = Register::index2VirtReg(0);
ASSERT_TRUE(OrigReg.isValid()) << "Register %0 should be valid";
- llvm::errs() << "Using 128-bit register: %" << OrigReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "Using 128-bit register: %" << OrigReg.virtRegIndex() << "\n");
// Find sub2_3 subregister index (64-bit covering bits 64-127)
unsigned Sub2_3Idx = 0;
@@ -2035,9 +2012,9 @@ body: |
// sub2_3 should have mask 0xF0 (lanes for bits 64-127)
if (SubRegSize == 64 && (Mask.getAsInteger() & 0xF0) == 0xF0) {
Sub2_3Idx = Idx;
- llvm::errs() << "Found sub2_3 index: " << Idx
- << " (size=" << SubRegSize
- << ", mask=0x" << llvm::format("%X", Mask.getAsInteger()) << ")\n";
+ LLVM_DEBUG(dbgs() << "Found sub2_3 index: " << Idx
+ << " (size=" << SubRegSize
+ << ", mask=0x" << llvm::format("%X", Mask.getAsInteger()) << ")\n");
break;
}
}
@@ -2065,8 +2042,8 @@ body: |
.addDef(OrigReg, RegState::Define, Sub2_3Idx);
MachineInstr *NewDefMI = MIB.getInstr();
- llvm::errs() << "Inserted new def in BB4: ";
- NewDefMI->print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Inserted new def in BB4: ");
+ LLVM_DEBUG(NewDefMI->print(dbgs()));
// Index the new instruction
LIS.InsertMachineInstrInMaps(*NewDefMI);
@@ -2079,12 +2056,12 @@ body: |
MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
- llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n");
// Print final state of key blocks
- llvm::errs() << "\nFinal BB5 (diamond1 join):\n";
+ LLVM_DEBUG(dbgs() << "\nFinal BB5 (diamond1 join):\n");
for (MachineInstr &MI : *BB5) {
- MI.print(llvm::errs());
+ LLVM_DEBUG(MI.print(dbgs()));
}
// Verify SSA repair results
@@ -2095,8 +2072,8 @@ body: |
if (MI.isPHI()) {
Register PHIResult = MI.getOperand(0).getReg();
if (PHIResult.isVirtual()) {
- llvm::errs() << "Found PHI in BB5: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found PHI in BB5: ");
+ LLVM_DEBUG(MI.print(dbgs()));
// Check that it has 2 incoming values
unsigned NumIncoming = (MI.getNumOperands() - 1) / 2;
@@ -2113,14 +2090,14 @@ body: |
if (IncomingMBB == BB4) {
HasNewRegFromBB4 = (IncomingReg == NewReg);
- llvm::errs() << " Incoming from BB4: %" << IncomingReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << " Incoming from BB4: %" << IncomingReg.virtRegIndex() << "\n");
} else if (IncomingMBB == BB3) {
// Should be %0.sub2_3 (the lanes we redefined)
- llvm::errs() << " Incoming from BB3: %" << IncomingReg.virtRegIndex();
+ LLVM_DEBUG(dbgs() << " Incoming from BB3: %" << IncomingReg.virtRegIndex());
if (IncomingSubReg) {
- llvm::errs() << "." << TRI->getSubRegIndexName(IncomingSubReg);
+ LLVM_DEBUG(dbgs() << "." << TRI->getSubRegIndexName(IncomingSubReg));
}
- llvm::errs() << "\n";
+ LLVM_DEBUG(dbgs() << "\n");
// Verify it's using sub2_3
if (IncomingReg == OrigReg && IncomingSubReg == Sub2_3Idx) {
@@ -2144,12 +2121,9 @@ body: |
LiveInterval &OrigLI = LIS.getInterval(OrigReg);
EXPECT_TRUE(OrigLI.hasSubRanges()) << "OrigReg should have subranges after partial redef";
- // Debug output if verification fails
- if (!MF.verify(nullptr, nullptr, nullptr, false)) {
- llvm::errs() << "MachineFunction verification failed:\n";
- MF.print(llvm::errs());
- LIS.print(llvm::errs());
- }
+ // Verify the MachineFunction is still valid
+ EXPECT_TRUE(MF.verify(nullptr, nullptr, nullptr, false))
+ << "MachineFunction verification failed";
});
}
@@ -2229,7 +2203,7 @@ body: |
[](MachineFunction &MF, LiveIntervalsWrapperPass &LISWrapper) {
LiveIntervals &LIS = LISWrapper.getLIS();
MachineDominatorTree MDT(MF);
- llvm::errs() << "\n=== NonContiguousLaneMaskREGSEQUENCE Test ===\n";
+ LLVM_DEBUG(dbgs() << "\n=== NonContiguousLaneMaskREGSEQUENCE Test ===\n");
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
@@ -2248,7 +2222,7 @@ body: |
// Find %0 (the vreg_128)
Register OrigReg = Register::index2VirtReg(0);
ASSERT_TRUE(OrigReg.isValid()) << "Register %0 should be valid";
- llvm::errs() << "Using 128-bit register: %" << OrigReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "Using 128-bit register: %" << OrigReg.virtRegIndex() << "\n");
// Find sub1 subregister index
unsigned Sub1Idx = 0;
@@ -2268,8 +2242,8 @@ body: |
.addDef(OrigReg, RegState::Define, Sub1Idx);
MachineInstr *NewDefMI = MIB.getInstr();
- llvm::errs() << "Inserted new def in BB3: ";
- NewDefMI->print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Inserted new def in BB3: ");
+ LLVM_DEBUG(NewDefMI->print(dbgs()));
// Index the new instruction
LIS.InsertMachineInstrInMaps(*NewDefMI);
@@ -2282,12 +2256,12 @@ body: |
MachineLaneSSAUpdater Updater(MF, LIS, MDT, *TRI);
Register NewReg = Updater.repairSSAForNewDef(*NewDefMI, OrigReg);
- llvm::errs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n";
+ LLVM_DEBUG(dbgs() << "SSA repair created new register: %" << NewReg.virtRegIndex() << "\n");
// Print final state
- llvm::errs() << "\nFinal BB4 (diamond join):\n";
+ LLVM_DEBUG(dbgs() << "\nFinal BB4 (diamond join):\n");
for (MachineInstr &MI : *BB4) {
- MI.print(llvm::errs());
+ LLVM_DEBUG(MI.print(dbgs()));
}
// Verify SSA repair results
@@ -2299,8 +2273,8 @@ body: |
if (MI.isPHI()) {
PHIReg = MI.getOperand(0).getReg();
if (PHIReg.isVirtual()) {
- llvm::errs() << "Found PHI in BB4: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found PHI in BB4: ");
+ LLVM_DEBUG(MI.print(dbgs()));
FoundPHI = true;
// Check that it has 2 incoming values
@@ -2333,13 +2307,13 @@ body: |
for (MachineInstr &MI : *BB4) {
if (MI.getOpcode() == TargetOpcode::REG_SEQUENCE) {
- llvm::errs() << "Found REG_SEQUENCE: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found REG_SEQUENCE: ");
+ LLVM_DEBUG(MI.print(dbgs()));
FoundREGSEQUENCE = true;
// Count sources (each source is: register + subregidx, so pairs)
NumREGSEQSources = (MI.getNumOperands() - 1) / 2;
- llvm::errs() << " REG_SEQUENCE has " << NumREGSEQSources << " sources\n";
+ LLVM_DEBUG(dbgs() << " REG_SEQUENCE has " << NumREGSEQSources << " sources\n");
// We expect at least 2 sources for non-contiguous case:
// 1. PHI result covering sub1
@@ -2370,8 +2344,8 @@ body: |
if (MI.getOpcode() == TargetOpcode::COPY) {
MachineOperand &SrcOp = MI.getOperand(1);
if (SrcOp.isReg() && SrcOp.getReg().isVirtual() && SrcOp.getReg() != OrigReg) {
- llvm::errs() << "Found rewritten COPY: ";
- MI.print(llvm::errs());
+ LLVM_DEBUG(dbgs() << "Found rewritten COPY: ");
+ LLVM_DEBUG(MI.print(dbgs()));
FoundRewrittenUse = true;
break;
}
@@ -2381,12 +2355,12 @@ body: |
EXPECT_TRUE(FoundRewrittenUse) << "COPY should be rewritten to use REG_SEQUENCE result";
// Print summary
- llvm::errs() << "\n=== Test Summary ===\n";
- llvm::errs() << "✓ Redefined sub1 (middle lane) of vreg_128\n";
- llvm::errs() << "✓ Created PHI for sub1 lane\n";
- llvm::errs() << "✓ Created REG_SEQUENCE with " << NumREGSEQSources
- << " sources to handle non-contiguous lanes (sub0 + sub2 + sub3)\n";
- llvm::errs() << "✓ This test exercises getCoveringSubRegsForLaneMask!\n";
+ LLVM_DEBUG(dbgs() << "\n=== Test Summary ===\n");
+ LLVM_DEBUG(dbgs() << "✓ Redefined sub1 (middle lane) of vreg_128\n");
+ LLVM_DEBUG(dbgs() << "✓ Created PHI for sub1 lane\n");
+ LLVM_DEBUG(dbgs() << "✓ Created REG_SEQUENCE with " << NumREGSEQSources
+ << " sources to handle non-contiguous lanes (sub0 + sub2 + sub3)\n");
+ LLVM_DEBUG(dbgs() << "✓ This test exercises getCoveringSubRegsForLaneMask!\n");
});
}
More information about the llvm-commits
mailing list