[llvm] [AMDGPU] Fix GFX1250 hazard: S_SET_VGPR_MSB dropped (PR #184904)
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 5 14:59:44 PST 2026
https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/184904
>From 462d4e5347b553e64f94bebf72fdd1671ae384f9 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Wed, 4 Mar 2026 19:42:17 -0500
Subject: [PATCH] [AMDGPU] Fix GFX1250 hazard: S_SET_VGPR_MSB dropped after
S_SETREG_IMM32_B32 (MODE)
On GFX1250, S_SET_VGPR_MSB is silently dropped when immediately following
S_SETREG_IMM32_B32 targeting the MODE register.
For Case 2 (size > 12), where imm32[12:19] is part of the MODE value and
cannot be freely modified, GCNHazardRecognizer predicts whether
AMDGPULowerVGPREncoding will place S_SET_VGPR_MSB after the setreg and
inserts S_NOPs to prevent the hazard. AMDGPULowerVGPREncoding then skips
over these S_NOPs when placing S_SET_VGPR_MSB.
The prediction (willSetregNeedVGPRMSB) mirrors handleSetregMode's logic:
- If imm[12:19] matches the next VALU's MSB, no S_SET_VGPR_MSB is needed
- If no preceding high VGPRs are in use, handleSetregMode won't insert
- Otherwise, S_NOPs are inserted to separate the instructions
Shared MSB computation utilities (OpMode, ModeTy, computeMode, etc.) are
extracted into AMDGPUVGPREncoding.h for use by both passes.
The number of S_NOPs is configurable via -amdgpu-setreg-vgpr-msb-nops
(default: 1). A debug assertion verifies no back-to-back setreg +
S_SET_VGPR_MSB remains after lowering.
---
.../Target/AMDGPU/AMDGPULowerVGPREncoding.cpp | 216 +++++----------
llvm/lib/Target/AMDGPU/AMDGPUVGPREncoding.h | 253 ++++++++++++++++++
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 34 +++
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
.../AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir | 117 ++++++++
.../CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir | 4 +-
6 files changed, 475 insertions(+), 150 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUVGPREncoding.h
create mode 100644 llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index 3aa4f1893698a..822e70c623a43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -42,10 +42,12 @@
#include "AMDGPULowerVGPREncoding.h"
#include "AMDGPU.h"
+#include "AMDGPUVGPREncoding.h"
#include "GCNSubtarget.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "llvm/ADT/bit.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -54,63 +56,9 @@ using namespace llvm;
namespace {
-class AMDGPULowerVGPREncoding {
- static constexpr unsigned OpNum = 4;
- static constexpr unsigned BitsPerField = 2;
- static constexpr unsigned NumFields = 4;
- static constexpr unsigned ModeWidth = NumFields * BitsPerField;
- static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
- static constexpr unsigned VGPRMSBShift =
- llvm::countr_zero_constexpr<unsigned>(AMDGPU::Hwreg::DST_VGPR_MSB);
-
- struct OpMode {
- // No MSBs set means they are not required to be of a particular value.
- std::optional<unsigned> MSBits;
-
- bool update(const OpMode &New, bool &Rewritten) {
- bool Updated = false;
- if (New.MSBits) {
- if (*New.MSBits != MSBits.value_or(0)) {
- Updated = true;
- Rewritten |= MSBits.has_value();
- }
- MSBits = New.MSBits;
- }
- return Updated;
- }
- };
-
- struct ModeTy {
- OpMode Ops[OpNum];
-
- bool update(const ModeTy &New, bool &Rewritten) {
- bool Updated = false;
- for (unsigned I : seq(OpNum))
- Updated |= Ops[I].update(New.Ops[I], Rewritten);
- return Updated;
- }
-
- unsigned encode() const {
- // Layout: [src0 msb, src1 msb, src2 msb, dst msb].
- unsigned V = 0;
- for (const auto &[I, Op] : enumerate(Ops))
- V |= Op.MSBits.value_or(0) << (I * 2);
- return V;
- }
-
- // Check if this mode is compatible with required \p NewMode without
- // modification.
- bool isCompatible(const ModeTy NewMode) const {
- for (unsigned I : seq(OpNum)) {
- if (!NewMode.Ops[I].MSBits.has_value())
- continue;
- if (Ops[I].MSBits.value_or(0) != NewMode.Ops[I].MSBits.value_or(0))
- return false;
- }
- return true;
- }
- };
+using namespace AMDGPU::VGPREncoding;
+class AMDGPULowerVGPREncoding {
public:
bool run(MachineFunction &MF);
@@ -150,20 +98,9 @@ class AMDGPULowerVGPREncoding {
setMode(Mode, I);
}
- /// If \p MO references VGPRs, return the MSBs. Otherwise, return nullopt.
- std::optional<unsigned> getMSBs(const MachineOperand &MO) const;
-
/// Handle single \p MI. \return true if changed.
bool runOnMachineInstr(MachineInstr &MI);
- /// Compute the mode for a single \p MI given \p Ops operands
- /// bit mapping. Optionally takes second array \p Ops2 for VOPD.
- /// If provided and an operand from \p Ops is not a VGPR, then \p Ops2
- /// is checked.
- void computeMode(ModeTy &NewMode, const MachineInstr &MI,
- const AMDGPU::OpName Ops[OpNum],
- const AMDGPU::OpName *Ops2 = nullptr);
-
/// Check if an instruction \p I is within a clause and returns a suitable
/// iterator to insert mode change. It may also modify the S_CLAUSE
/// instruction to extend it or drop the clause if it cannot be adjusted.
@@ -182,6 +119,10 @@ class AMDGPULowerVGPREncoding {
/// new one was inserted.
bool handleSetregMode(MachineInstr &MI);
+ /// Set CurrentMode from an encoded mode value (as returned by
+ /// ModeTy::encode / computeNextMSB).
+ void setCurrentModeFromEncoded(int64_t Encoded);
+
/// Update bits[12:19] of the imm operand in S_SETREG_IMM32_B32 to contain
/// the VGPR MSB mode value. \returns true if the immediate was changed.
bool updateSetregModeImm(MachineInstr &MI, int64_t ModeValue);
@@ -222,76 +163,15 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode,
return true;
}
-std::optional<unsigned>
-AMDGPULowerVGPREncoding::getMSBs(const MachineOperand &MO) const {
- if (!MO.isReg())
- return std::nullopt;
-
- MCRegister Reg = MO.getReg();
- const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
- if (!RC || !TRI->isVGPRClass(RC))
- return std::nullopt;
-
- unsigned Idx = TRI->getHWRegIndex(Reg);
- return Idx >> 8;
-}
-
-void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode,
- const MachineInstr &MI,
- const AMDGPU::OpName Ops[OpNum],
- const AMDGPU::OpName *Ops2) {
- NewMode = {};
-
- for (unsigned I = 0; I < OpNum; ++I) {
- const MachineOperand *Op = TII->getNamedOperand(MI, Ops[I]);
-
- std::optional<unsigned> MSBits;
- if (Op)
- MSBits = getMSBs(*Op);
-
-#if !defined(NDEBUG)
- if (MSBits.has_value() && Ops2) {
- const MachineOperand *Op2 = TII->getNamedOperand(MI, Ops2[I]);
- if (Op2) {
- std::optional<unsigned> MSBits2;
- MSBits2 = getMSBs(*Op2);
- if (MSBits2.has_value() && MSBits != MSBits2)
- llvm_unreachable("Invalid VOPD pair was created");
- }
- }
-#endif
-
- if (!MSBits.has_value() && Ops2) {
- Op = TII->getNamedOperand(MI, Ops2[I]);
- if (Op)
- MSBits = getMSBs(*Op);
- }
-
- if (!MSBits.has_value())
- continue;
-
- // Skip tied uses of src2 of VOP2, these will be handled along with defs and
- // only vdst bit affects these operands. We cannot skip tied uses of VOP3,
- // these uses are real even if must match the vdst.
- if (Ops[I] == AMDGPU::OpName::src2 && !Op->isDef() && Op->isTied() &&
- (SIInstrInfo::isVOP2(MI) ||
- (SIInstrInfo::isVOP3(MI) &&
- TII->hasVALU32BitEncoding(MI.getOpcode()))))
- continue;
-
- NewMode.Ops[I].MSBits = MSBits.value();
- }
-}
-
bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {
auto Ops = AMDGPU::getVGPRLoweringOperandTables(MI.getDesc());
if (Ops.first) {
ModeTy NewMode;
- computeMode(NewMode, MI, Ops.first, Ops.second);
+ computeMode(NewMode, MI, Ops.first, Ops.second, *TII, *TRI);
if (!CurrentMode.isCompatible(NewMode) && MI.isCommutable() &&
TII->commuteInstruction(MI)) {
ModeTy NewModeCommuted;
- computeMode(NewModeCommuted, MI, Ops.first, Ops.second);
+ computeMode(NewModeCommuted, MI, Ops.first, Ops.second, *TII, *TRI);
if (CurrentMode.isCompatible(NewModeCommuted)) {
// Update CurrentMode with mode bits the commuted instruction relies on.
// This prevents later instructions from piggybacking and corrupting
@@ -369,15 +249,6 @@ AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
return I;
}
-/// Convert mode value from S_SET_VGPR_MSB format to MODE register format.
-/// S_SET_VGPR_MSB uses: (src0[0-1], src1[2-3], src2[4-5], dst[6-7])
-/// MODE register uses: (dst[0-1], src0[2-3], src1[4-5], src2[6-7])
-/// This is a left rotation by 2 bits on an 8-bit value.
-static int64_t convertModeToSetregFormat(int64_t Mode) {
- assert(isUInt<8>(Mode) && "Mode expected to be 8-bit");
- return llvm::rotl<uint8_t>(static_cast<uint8_t>(Mode), /*R=*/2);
-}
-
bool AMDGPULowerVGPREncoding::updateSetregModeImm(MachineInstr &MI,
int64_t ModeValue) {
assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
@@ -423,30 +294,54 @@ bool AMDGPULowerVGPREncoding::handleSetregMode(MachineInstr &MI) {
// Case 2: Size > 12 - the original instruction uses bits beyond 11, so we
// cannot arbitrarily modify imm32[12:19]. Check if it already matches VGPR
- // MSBs. Note: imm32[12:19] is in MODE register format, while ModeValue is
- // in S_SET_VGPR_MSB format, so we need to convert before comparing.
+ // MSBs (current or next). Note: imm32[12:19] is in MODE register format,
+ // while ModeValue is in S_SET_VGPR_MSB format, so we need to convert before
+ // comparing.
MachineOperand *ImmOp = TII->getNamedOperand(MI, AMDGPU::OpName::imm);
assert(ImmOp && "ImmOp must be present");
int64_t ImmBits12To19 = (ImmOp->getImm() & VGPR_MSB_MASK) >> VGPRMSBShift;
int64_t SetregModeValue = convertModeToSetregFormat(ModeValue);
- if (ImmBits12To19 == SetregModeValue) {
- // Already correct, but we must invalidate MostRecentModeSet because this
- // instruction will overwrite mode[12:19]. We can't update this instruction
- // via piggybacking (bits[12:19] are meaningful), so if CurrentMode changes,
- // a new s_set_vgpr_msb will be inserted after this instruction.
+ if (ModeValue == 0 || ImmBits12To19 == SetregModeValue) {
+ LLVM_DEBUG(dbgs() << " Setreg MODE (size=" << Size
+ << "): imm[12:19] matches current MSB " << ModeValue
+ << "\n");
MostRecentModeSet = nullptr;
return false;
}
- // imm32[12:19] doesn't match VGPR MSBs - insert s_set_vgpr_msb after
- // the original instruction to restore the correct value.
- MachineBasicBlock::iterator InsertPt = std::next(MI.getIterator());
+ // Check if imm32[12:19] matches the next instruction's MSB. If so, the
+ // setreg already sets the correct MSB for what follows - no s_set_vgpr_msb
+ // needed, avoiding the hazard entirely.
+ if (auto NextMSB = computeNextMSB(MI.getIterator(), MBB, *TII, *TRI)) {
+ int64_t NextSetregMode = convertModeToSetregFormat(*NextMSB);
+ if (ImmBits12To19 == NextSetregMode) {
+ LLVM_DEBUG(dbgs() << " Setreg MODE (size=" << Size
+ << "): imm[12:19] matches next MSB " << *NextMSB
+ << "\n");
+ setCurrentModeFromEncoded(*NextMSB);
+ MostRecentModeSet = nullptr;
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " Setreg MODE (size=" << Size
+ << "): imm[12:19] mismatch, inserting "
+ "s_set_vgpr_msb (current MSB=" << ModeValue << ")\n");
+ MachineBasicBlock::instr_iterator InsertPt = std::next(MI.getIterator());
+ while (InsertPt != MBB->instr_end() &&
+ InsertPt->getOpcode() == AMDGPU::S_NOP)
+ ++InsertPt;
MostRecentModeSet = BuildMI(*MBB, InsertPt, MI.getDebugLoc(),
TII->get(AMDGPU::S_SET_VGPR_MSB))
.addImm(ModeValue);
return true;
}
+void AMDGPULowerVGPREncoding::setCurrentModeFromEncoded(int64_t Encoded) {
+ for (unsigned J = 0; J < OpNum; ++J)
+ CurrentMode.Ops[J].MSBits = (Encoded >> (J * BitsPerField)) & 0x3;
+}
+
bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (!ST.has1024AddressableVGPRs())
@@ -506,6 +401,29 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
resetMode(MBB.instr_end());
}
+#ifndef NDEBUG
+ for (auto &MBB : MF) {
+ for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) {
+ if (I->getOpcode() != AMDGPU::S_SETREG_IMM32_B32)
+ continue;
+ using namespace AMDGPU::Hwreg;
+ const MachineOperand *SIMM16Op =
+ TII->getNamedOperand(*I, AMDGPU::OpName::simm16);
+ auto [HwRegId, Offset, Size] = HwregEncoding::decode(SIMM16Op->getImm());
+ (void)Offset;
+ (void)Size;
+ if (HwRegId != ID_MODE)
+ continue;
+ auto Next = std::next(I);
+ while (Next != E && Next->isMetaInstruction())
+ ++Next;
+ assert((Next == E || Next->getOpcode() != AMDGPU::S_SET_VGPR_MSB) &&
+ "s_set_vgpr_msb immediately after s_setreg_imm32_b32 (MODE) "
+ "creates a hardware hazard on GFX1250");
+ }
+ }
+#endif
+
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUVGPREncoding.h b/llvm/lib/Target/AMDGPU/AMDGPUVGPREncoding.h
new file mode 100644
index 0000000000000..bab88ee9baa0a
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUVGPREncoding.h
@@ -0,0 +1,253 @@
+//===--- AMDGPUVGPREncoding.h - VGPR MSB encoding utilities -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Shared utilities for VGPR MSB mode computation used by
+/// AMDGPULowerVGPREncoding and GCNHazardRecognizer.
+///
+/// On GFX1250, VGPRs above v255 require MSB mode bits to be set via
+/// S_SET_VGPR_MSB. These utilities compute the required MSB mode for
+/// a given instruction based on its VGPR operands.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUVGPRENCODING_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUVGPRENCODING_H
+
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/bit.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/MathExtras.h"
+#include <optional>
+
+namespace llvm {
+namespace AMDGPU {
+namespace VGPREncoding {
+
+static constexpr unsigned OpNum = 4;
+static constexpr unsigned BitsPerField = 2;
+static constexpr unsigned NumFields = 4;
+static constexpr unsigned ModeWidth = NumFields * BitsPerField;
+static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
+static constexpr unsigned VGPRMSBShift =
+ llvm::countr_zero_constexpr<unsigned>(AMDGPU::Hwreg::DST_VGPR_MSB);
+
+struct OpMode {
+ std::optional<unsigned> MSBits;
+
+ bool update(const OpMode &New, bool &Rewritten) {
+ bool Updated = false;
+ if (New.MSBits) {
+ if (*New.MSBits != MSBits.value_or(0)) {
+ Updated = true;
+ Rewritten |= MSBits.has_value();
+ }
+ MSBits = New.MSBits;
+ }
+ return Updated;
+ }
+};
+
+struct ModeTy {
+ OpMode Ops[OpNum];
+
+ bool update(const ModeTy &New, bool &Rewritten) {
+ bool Updated = false;
+ for (unsigned I : seq(OpNum))
+ Updated |= Ops[I].update(New.Ops[I], Rewritten);
+ return Updated;
+ }
+
+ unsigned encode() const {
+ unsigned V = 0;
+ for (const auto &[I, Op] : enumerate(Ops))
+ V |= Op.MSBits.value_or(0) << (I * 2);
+ return V;
+ }
+
+ bool isCompatible(const ModeTy NewMode) const {
+ for (unsigned I : seq(OpNum)) {
+ if (!NewMode.Ops[I].MSBits.has_value())
+ continue;
+ if (Ops[I].MSBits.value_or(0) != NewMode.Ops[I].MSBits.value_or(0))
+ return false;
+ }
+ return true;
+ }
+};
+
+/// Get the MSB bits for a register operand. Returns the high bits
+/// (index >> 8) if the operand is a VGPR, nullopt otherwise.
+inline std::optional<unsigned> getMSBs(const MachineOperand &MO,
+ const SIRegisterInfo &TRI) {
+ if (!MO.isReg())
+ return std::nullopt;
+
+ MCRegister Reg = MO.getReg();
+ const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(Reg);
+ if (!RC || !TRI.isVGPRClass(RC))
+ return std::nullopt;
+
+ unsigned Idx = TRI.getHWRegIndex(Reg);
+ return Idx >> 8;
+}
+
+/// Compute the VGPR MSB mode required by a single instruction.
+inline void computeMode(ModeTy &NewMode, const MachineInstr &MI,
+ const AMDGPU::OpName Ops[OpNum],
+ const AMDGPU::OpName *Ops2,
+ const SIInstrInfo &TII, const SIRegisterInfo &TRI) {
+ NewMode = {};
+
+ for (unsigned I = 0; I < OpNum; ++I) {
+ const MachineOperand *Op = TII.getNamedOperand(MI, Ops[I]);
+
+ std::optional<unsigned> MSBits;
+ if (Op)
+ MSBits = getMSBs(*Op, TRI);
+
+#if !defined(NDEBUG)
+ if (MSBits.has_value() && Ops2) {
+ const MachineOperand *Op2 = TII.getNamedOperand(MI, Ops2[I]);
+ if (Op2) {
+ std::optional<unsigned> MSBits2 = getMSBs(*Op2, TRI);
+ if (MSBits2.has_value() && MSBits != MSBits2)
+ llvm_unreachable("Invalid VOPD pair was created");
+ }
+ }
+#endif
+
+ if (!MSBits.has_value() && Ops2) {
+ Op = TII.getNamedOperand(MI, Ops2[I]);
+ if (Op)
+ MSBits = getMSBs(*Op, TRI);
+ }
+
+ if (!MSBits.has_value())
+ continue;
+
+ if (Ops[I] == AMDGPU::OpName::src2 && !Op->isDef() && Op->isTied() &&
+ (SIInstrInfo::isVOP2(MI) ||
+ (SIInstrInfo::isVOP3(MI) &&
+ TII.hasVALU32BitEncoding(MI.getOpcode()))))
+ continue;
+
+ NewMode.Ops[I].MSBits = MSBits.value();
+ }
+}
+
+/// Convert mode value from S_SET_VGPR_MSB format to MODE register format.
+/// S_SET_VGPR_MSB uses: (src0[0-1], src1[2-3], src2[4-5], dst[6-7])
+/// MODE register uses: (dst[0-1], src0[2-3], src1[4-5], src2[6-7])
+/// This is a left rotation by 2 bits on an 8-bit value.
+inline int64_t convertModeToSetregFormat(int64_t Mode) {
+ assert(isUInt<8>(Mode) && "Mode expected to be 8-bit");
+ return llvm::rotl<uint8_t>(static_cast<uint8_t>(Mode), /*R=*/2);
+}
+
+/// Look ahead from iterator \p I in \p MBB to find the VGPR MSB mode needed
+/// by the next VALU instruction. Returns the encoded mode value, or
+/// std::nullopt if no VGPR instruction is found before block end or a
+/// terminator.
+inline std::optional<int64_t>
+computeNextMSB(MachineBasicBlock::instr_iterator I, MachineBasicBlock *MBB,
+ const SIInstrInfo &TII, const SIRegisterInfo &TRI) {
+ auto E = MBB->instr_end();
+ for (auto Next = std::next(I); Next != E; ++Next) {
+ if (Next->isMetaInstruction())
+ continue;
+ if (Next->isTerminator() || Next->isCall() || Next->isInlineAsm())
+ return std::nullopt;
+ auto Ops = AMDGPU::getVGPRLoweringOperandTables(Next->getDesc());
+ if (!Ops.first)
+ continue;
+ ModeTy NextMode;
+ computeMode(NextMode, *Next, Ops.first, Ops.second, TII, TRI);
+ bool HasVGPR = llvm::any_of(NextMode.Ops, [](const OpMode &Op) {
+ return Op.MSBits.has_value();
+ });
+ if (HasVGPR)
+ return NextMode.encode();
+ }
+ return std::nullopt;
+}
+
+/// Compute the accumulated VGPR MSB mode from the beginning of the block
+/// up to (not including) \p SetregI. This approximates what CurrentMode
+/// would be in AMDGPULowerVGPREncoding when it reaches the setreg.
+inline unsigned computeCurrentMSB(MachineBasicBlock::instr_iterator SetregI,
+ MachineBasicBlock *MBB,
+ const SIInstrInfo &TII,
+ const SIRegisterInfo &TRI) {
+ ModeTy CurrentMode;
+ for (auto I = MBB->instr_begin(); I != SetregI; ++I) {
+ if (I->isMetaInstruction())
+ continue;
+ if (I->isTerminator() || I->isCall())
+ break;
+ auto Ops = AMDGPU::getVGPRLoweringOperandTables(I->getDesc());
+ if (!Ops.first)
+ continue;
+ ModeTy NewMode;
+ computeMode(NewMode, *I, Ops.first, Ops.second, TII, TRI);
+ bool Unused = false;
+ CurrentMode.update(NewMode, Unused);
+ }
+ return CurrentMode.encode();
+}
+
+/// Predict whether AMDGPULowerVGPREncoding will place S_SET_VGPR_MSB
+/// immediately after an S_SETREG_IMM32_B32 (MODE) with size > 12.
+///
+/// The prediction mirrors handleSetregMode's logic:
+/// 1. If imm[12:19] matches the next VALU's MSB → no S_SET_VGPR_MSB (skip)
+/// 2. If CurrentMode is zero → handleSetregMode won't insert, but setMode
+/// for the next VALU might insert right after setreg if it needs high VGPRs
+/// 3. If CurrentMode is non-zero and imm[12:19] doesn't match → insert
+inline bool willSetregNeedVGPRMSB(MachineInstr &MI, MachineBasicBlock *MBB,
+ const SIInstrInfo &TII,
+ const SIRegisterInfo &TRI) {
+ assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
+
+ const MachineOperand *ImmOp = TII.getNamedOperand(MI, AMDGPU::OpName::imm);
+ assert(ImmOp && "ImmOp must be present");
+ int64_t ImmBits12To19 =
+ (ImmOp->getImm() & AMDGPU::Hwreg::VGPR_MSB_MASK) >> VGPRMSBShift;
+
+ auto NextMSB = computeNextMSB(MI.getIterator(), MBB, TII, TRI);
+
+ // If the next VALU's MSB matches imm[12:19], AMDGPULowerVGPREncoding
+ // will detect this and skip S_SET_VGPR_MSB entirely.
+ if (NextMSB) {
+ int64_t NextSetregMode = convertModeToSetregFormat(*NextMSB);
+ if (ImmBits12To19 == NextSetregMode)
+ return false;
+ }
+
+ // Compute what CurrentMode would be at the setreg. If it's zero (no
+ // preceding high VGPRs) or imm[12:19] already matches it, handleSetregMode
+ // won't insert. But setMode for the next VALU may still insert right after
+ // setreg if the next VALU needs high VGPRs with a different mode.
+ unsigned CurrentMSB = computeCurrentMSB(MI.getIterator(), MBB, TII, TRI);
+ if (CurrentMSB == 0 ||
+ ImmBits12To19 == convertModeToSetregFormat(CurrentMSB))
+ return NextMSB && *NextMSB != 0;
+
+ return true;
+}
+
+} // namespace VGPREncoding
+} // namespace AMDGPU
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUVGPRENCODING_H
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 30a9d1d2ab149..caa3bbcbd9217 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "GCNHazardRecognizer.h"
+#include "AMDGPUVGPREncoding.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
@@ -63,6 +64,11 @@ static cl::opt<bool> EnableWMMAVnopHoisting(
"amdgpu-wmma-vnop-hoisting", cl::init(true), cl::Hidden,
cl::desc("Hoist WMMA hazard V_NOPs from loops to preheaders"));
+static cl::opt<unsigned> SetregModeToVGPRMSBHazardNOPs(
+ "amdgpu-setreg-vgpr-msb-nops", cl::init(1), cl::Hidden,
+ cl::desc("Number of s_nop cycles after s_setreg_imm32_b32 (MODE) "
+ "for VGPR MSB hazard on GFX1250 (emitted as s_nop N-1)"));
+
//===----------------------------------------------------------------------===//
// Hazard Recognizer Implementation
//===----------------------------------------------------------------------===//
@@ -1308,6 +1314,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixScratchBaseForwardingHazard(MI);
if (ST.setRegModeNeedsVNOPs())
fixSetRegMode(MI);
+ if (ST.has1024AddressableVGPRs())
+ fixSetRegModeToVGPRMSBHazard(MI);
}
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3865,3 +3873,29 @@ bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
return true;
}
+
+bool GCNHazardRecognizer::fixSetRegModeToVGPRMSBHazard(MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::S_SETREG_IMM32_B32)
+ return false;
+
+ using namespace AMDGPU::Hwreg;
+ auto [HwRegId, Offset, Size] =
+ HwregEncoding::decode(MI->getOperand(1).getImm());
+ (void)Offset;
+ if (HwRegId != ID_MODE || Size <= 12)
+ return false;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ const SIRegisterInfo &TRI = TII.getRegisterInfo();
+
+ if (!AMDGPU::VGPREncoding::willSetregNeedVGPRMSB(*MI, MBB, TII, TRI))
+ return false;
+
+ LLVM_DEBUG(dbgs() << " Inserting s_nop " << SetregModeToVGPRMSBHazardNOPs - 1
+ << " after s_setreg_imm32_b32 (MODE)"
+ << " for VGPR MSB hazard\n");
+ auto NextI = std::next(MI->getIterator());
+ BuildMI(*MBB, NextI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
+ .addImm(SetregModeToVGPRMSBHazardNOPs - 1);
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index b331504d40113..52b54e8581f6b 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -137,6 +137,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
bool fixScratchBaseForwardingHazard(MachineInstr *MI);
bool fixSetRegMode(MachineInstr *MI);
+ bool fixSetRegModeToVGPRMSBHazard(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
new file mode 100644
index 0000000000000..66059ddd9de5b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
@@ -0,0 +1,117 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=post-RA-hazard-rec,amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
+
+# Test handling of the GFX1250 hardware hazard where S_SET_VGPR_MSB immediately
+# after S_SETREG_IMM32_B32 (MODE) is silently dropped.
+#
+# GCNHazardRecognizer inserts S_NOPs for Case 2 (size > 12) when preceding
+# instructions use high VGPRs. AMDGPULowerVGPREncoding skips over these S_NOPs
+# when placing S_SET_VGPR_MSB.
+
+---
+# Case 2 mismatch: setreg (size=16) with imm32[12:19] that doesn't match
+# current VGPR MSB. GCNHazardRecognizer inserts S_NOPs, then
+# AMDGPULowerVGPREncoding places S_SET_VGPR_MSB after them.
+name: setreg_mode_size_gt_12_mismatch
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_mode_size_gt_12_mismatch
+ ; CHECK: S_SET_VGPR_MSB 64, implicit-def $mode
+ ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+ ; imm32 = 0x23ABC = 146108 (bits 12:19 = 0x23, doesn't match VGPR MSB mode)
+ S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ S_ENDPGM 0
+...
+
+---
+# Case 2 matches next MSB: setreg (size=16) with imm32[12:19] matching the
+# NEXT instruction's MSB. GCNHazardRecognizer predicts that
+# AMDGPULowerVGPREncoding will skip S_SET_VGPR_MSB (since imm matches next),
+# so no S_NOPs are inserted.
+#
+# v256/v257 -> mode 65, MODE format 5
+# v512/v513 -> mode 130, MODE format 10 = 0xA
+# imm32 = 0xAABC (bits 12:19 = 0xA = 10, matches next MSB)
+name: setreg_mode_size_gt_12_matches_next
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_mode_size_gt_12_matches_next
+ ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
+ ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 undef $vgpr513, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
+ ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+ ; imm32 = 0xAABC = 43708 (bits 12:19 = 0xA = 10, matches next MSB for v512/v513)
+ S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
+ $vgpr512 = V_MOV_B32_e32 undef $vgpr513, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+# No hazard: S_SETREG_IMM32_B32 targeting non-MODE register.
+name: setreg_non_mode_no_hazard
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_non_mode_no_hazard
+ ; CHECK: S_SET_VGPR_MSB 64, implicit-def $mode
+ ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 0, 2178, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_SET_VGPR_MSB 16384, implicit-def $mode
+ ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 undef $vgpr1, undef $vgpr2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; hwreg(STATUS, 2, 2): simm16 = 2 | (2 << 6) | (1 << 11) = 0x882 = 2178
+ S_SETREG_IMM32_B32 0, 2178, implicit-def $mode, implicit $mode
+ $vgpr0 = V_ADD_F32_e32 undef $vgpr1, undef $vgpr2, implicit $exec, implicit $mode
+ S_ENDPGM 0
+...
+
+---
+# No hazard: Case 2 but no high VGPRs in the block at all.
+name: setreg_mode_size_gt_12_no_high_vgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_mode_size_gt_12_no_high_vgpr
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+ S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ S_ENDPGM 0
+...
+
+---
+# Case 2 with high VGPR only AFTER setreg: setreg (size=16) with low VGPRs
+# before but high VGPRs after. GCNHazardRecognizer predicts that the next
+# VALU needs high VGPRs and imm[12:19] doesn't match, so inserts S_NOPs.
+# AMDGPULowerVGPREncoding's setMode places S_SET_VGPR_MSB after the NOPs.
+name: setreg_mode_size_gt_12_high_vgpr_after
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_mode_size_gt_12_high_vgpr_after
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
+ ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+ S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir b/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
index ecfc3cdcd215c..719ec9f093aae 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=post-RA-hazard-rec,amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
---
# Case 1a: Size < 12 (size=4), imm32[12:19]=0
@@ -94,6 +94,7 @@ body: |
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
@@ -231,6 +232,7 @@ body: |
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
More information about the llvm-commits
mailing list