[llvm] [AMDGPU] Refactor out common exec mask opcode patterns (NFCI) (PR #154718)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 21 03:08:12 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Carl Ritson (perlfu)
<details>
<summary>Changes</summary>
Create utility mechanism for finding wave size dependent opcodes used to manipulate exec/lane masks.
---
Patch is 70.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154718.diff
13 Files Affected:
- (added) llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.cpp (+75)
- (added) llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h (+52)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+13-23)
- (modified) llvm/lib/Target/AMDGPU/CMakeLists.txt (+1)
- (modified) llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (+6-9)
- (modified) llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (+9-12)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+9-15)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+38-68)
- (modified) llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp (+10-13)
- (modified) llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp (+41-71)
- (modified) llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp (+42-52)
- (modified) llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp (+11-18)
- (modified) llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp (+55-87)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.cpp
new file mode 100644
index 0000000000000..8690afcdeef6d
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.cpp
@@ -0,0 +1,75 @@
+//===-- AMDGPULaneMaskUtils.cpp - -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPULaneMaskUtils.h"
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+
+#define DEBUG_TYPE "amdgpu-lane-mask-utils"
+
+using namespace llvm;
+
+namespace llvm::AMDGPU {
+
+LaneMaskConstants::LaneMaskConstants(unsigned WavefrontSize) {
+ if (WavefrontSize == 32) {
+ ExecReg = AMDGPU::EXEC_LO;
+ VccReg = AMDGPU::VCC_LO;
+ AndOpc = AMDGPU::S_AND_B32;
+ AndTermOpc = AMDGPU::S_AND_B32_term;
+ AndN2Opc = AMDGPU::S_ANDN2_B32;
+ AndN2SaveExecOpc = AMDGPU::S_ANDN2_SAVEEXEC_B32;
+ AndN2TermOpc = AMDGPU::S_ANDN2_B32_term;
+ AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
+ AndSaveExecTermOpc = AMDGPU::S_AND_SAVEEXEC_B32_term;
+ BfmOpc = AMDGPU::S_BFM_B32;
+ CMovOpc = AMDGPU::S_CMOV_B32;
+ CSelectOpc = AMDGPU::S_CSELECT_B32;
+ MovOpc = AMDGPU::S_MOV_B32;
+ MovTermOpc = AMDGPU::S_MOV_B32_term;
+ OrOpc = AMDGPU::S_OR_B32;
+ OrTermOpc = AMDGPU::S_OR_B32_term;
+ OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
+ XorOpc = AMDGPU::S_XOR_B32;
+ XorTermOpc = AMDGPU::S_XOR_B32_term;
+ WQMOpc = AMDGPU::S_WQM_B32;
+ } else {
+ ExecReg = AMDGPU::EXEC;
+ VccReg = AMDGPU::VCC;
+ AndOpc = AMDGPU::S_AND_B64;
+ AndTermOpc = AMDGPU::S_AND_B64_term;
+ AndN2Opc = AMDGPU::S_ANDN2_B64;
+ AndN2SaveExecOpc = AMDGPU::S_ANDN2_SAVEEXEC_B64;
+ AndN2TermOpc = AMDGPU::S_ANDN2_B64_term;
+ AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
+ AndSaveExecTermOpc = AMDGPU::S_AND_SAVEEXEC_B64_term;
+ BfmOpc = AMDGPU::S_BFM_B64;
+ CMovOpc = AMDGPU::S_CMOV_B64;
+ CSelectOpc = AMDGPU::S_CSELECT_B64;
+ MovOpc = AMDGPU::S_MOV_B64;
+ MovTermOpc = AMDGPU::S_MOV_B64_term;
+ OrOpc = AMDGPU::S_OR_B64;
+ OrTermOpc = AMDGPU::S_OR_B64_term;
+ OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
+ XorOpc = AMDGPU::S_XOR_B64;
+ XorTermOpc = AMDGPU::S_XOR_B64_term;
+ WQMOpc = AMDGPU::S_WQM_B64;
+ }
+}
+
+static const LaneMaskConstants Wave32LaneMaskConstants(32);
+static const LaneMaskConstants Wave64LaneMaskConstants(64);
+
+const LaneMaskConstants &getLaneMaskConstants(const GCNSubtarget *ST) {
+ unsigned WavefrontSize = ST->getWavefrontSize();
+ assert(WavefrontSize == 32 || WavefrontSize == 64);
+ return WavefrontSize == 32 ? Wave32LaneMaskConstants
+ : Wave64LaneMaskConstants;
+}
+
+} // end namespace llvm::AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
new file mode 100644
index 0000000000000..6c11dbd73ef3b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
@@ -0,0 +1,52 @@
+//===- AMDGPULaneMaskUtils.h - Exec/lane mask helper functions -*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEMASKUTILS_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEMASKUTILS_H
+
+#include "llvm/CodeGen/Register.h"
+
+namespace llvm {
+
+class GCNSubtarget;
+
+namespace AMDGPU {
+
+class LaneMaskConstants {
+public:
+ Register ExecReg;
+ Register VccReg;
+ unsigned AndOpc;
+ unsigned AndTermOpc;
+ unsigned AndN2Opc;
+ unsigned AndN2SaveExecOpc;
+ unsigned AndN2TermOpc;
+ unsigned AndSaveExecOpc;
+ unsigned AndSaveExecTermOpc;
+ unsigned BfmOpc;
+ unsigned CMovOpc;
+ unsigned CSelectOpc;
+ unsigned MovOpc;
+ unsigned MovTermOpc;
+ unsigned OrOpc;
+ unsigned OrTermOpc;
+ unsigned OrSaveExecOpc;
+ unsigned XorOpc;
+ unsigned XorTermOpc;
+ unsigned WQMOpc;
+
+ LaneMaskConstants(unsigned WavefrontSize);
+};
+
+const LaneMaskConstants &getLaneMaskConstants(const GCNSubtarget *ST);
+
+} // end namespace AMDGPU
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULANEMASKUTILS_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 237929699dd9d..7c37347139607 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -73,6 +73,7 @@
#include "AMDGPU.h"
#include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPULaneMaskUtils.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
@@ -783,17 +784,8 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MachineFunction *MF = &B.getMF();
const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
- const unsigned MovExecOpc =
- Subtarget.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- const unsigned MovExecTermOpc =
- Subtarget.isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
-
- const unsigned XorTermOpc = Subtarget.isWave32() ?
- AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
- const unsigned AndSaveExecOpc = Subtarget.isWave32() ?
- AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
- const unsigned ExecReg = Subtarget.isWave32() ?
- AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ const AMDGPU::LaneMaskConstants &LMC =
+ AMDGPU::getLaneMaskConstants(&Subtarget);
#ifndef NDEBUG
const int OrigRangeSize = std::distance(Range.begin(), Range.end());
@@ -941,19 +933,19 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MRI.setRegClass(CondReg, WaveRC);
// Update EXEC, save the original EXEC value to VCC.
- B.buildInstr(AndSaveExecOpc)
- .addDef(NewExec)
- .addReg(CondReg, RegState::Kill);
+ B.buildInstr(LMC.AndSaveExecOpc)
+ .addDef(NewExec)
+ .addReg(CondReg, RegState::Kill);
MRI.setSimpleHint(NewExec, CondReg);
B.setInsertPt(*BodyBB, BodyBB->end());
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
- B.buildInstr(XorTermOpc)
- .addDef(ExecReg)
- .addReg(ExecReg)
- .addReg(NewExec);
+ B.buildInstr(LMC.XorTermOpc)
+ .addDef(LMC.ExecReg)
+ .addReg(LMC.ExecReg)
+ .addReg(NewExec);
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
// s_cbranch_scc0?
@@ -962,14 +954,12 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
// Save the EXEC mask before the loop.
- BuildMI(MBB, MBB.end(), DL, TII->get(MovExecOpc), SaveExecReg)
- .addReg(ExecReg);
+ BuildMI(MBB, MBB.end(), DL, TII->get(LMC.MovOpc), SaveExecReg)
+ .addReg(LMC.ExecReg);
// Restore the EXEC mask after the loop.
B.setMBB(*RestoreExecBB);
- B.buildInstr(MovExecTermOpc)
- .addDef(ExecReg)
- .addReg(SaveExecReg);
+ B.buildInstr(LMC.MovTermOpc).addDef(LMC.ExecReg).addReg(SaveExecReg);
// Set the insert point after the original instruction, so any new
// instructions will be in the remainder.
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index dc9dd220130ea..f2baf0787bcf4 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -69,6 +69,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPULegalizerInfo.cpp
AMDGPULibCalls.cpp
AMDGPUImageIntrinsicOptimizer.cpp
+ AMDGPULaneMaskUtils.cpp
AMDGPULibFunc.cpp
AMDGPULowerBufferFatPointers.cpp
AMDGPULowerKernelArguments.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index dce4e6f993005..64e8e5e4cc00a 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -66,6 +66,7 @@
#include "SIFixSGPRCopies.h"
#include "AMDGPU.h"
+#include "AMDGPULaneMaskUtils.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -1134,7 +1135,8 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
}
void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
- bool IsWave32 = MF.getSubtarget<GCNSubtarget>().isWave32();
+ const AMDGPU::LaneMaskConstants &LMC =
+ AMDGPU::getLaneMaskConstants(&MF.getSubtarget<GCNSubtarget>());
for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
++I) {
@@ -1148,10 +1150,7 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
Register SCCCopy =
MRI->createVirtualRegister(TRI->getWaveMaskRegClass());
I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)),
- MI.getDebugLoc(),
- TII->get(IsWave32 ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64),
- SCCCopy)
+ MI.getDebugLoc(), TII->get(LMC.CSelectOpc), SCCCopy)
.addImm(-1)
.addImm(0);
I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(),
@@ -1161,14 +1160,12 @@ void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
continue;
}
if (DstReg == AMDGPU::SCC) {
- unsigned Opcode = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
- Register Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
I = BuildMI(*MI.getParent(), std::next(MachineBasicBlock::iterator(MI)),
- MI.getDebugLoc(), TII->get(Opcode))
+ MI.getDebugLoc(), TII->get(LMC.AndOpc))
.addReg(Tmp, getDefRegState(true))
.addReg(SrcReg)
- .addReg(Exec);
+ .addReg(LMC.ExecReg);
MI.eraseFromParent();
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 9b348d46fec4f..7895c518103d5 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -8,6 +8,7 @@
#include "SIFrameLowering.h"
#include "AMDGPU.h"
+#include "AMDGPULaneMaskUtils.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
@@ -984,6 +985,7 @@ void SIFrameLowering::emitCSRSpillStores(
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AMDGPU::LaneMaskConstants &LMC = AMDGPU::getLaneMaskConstants(&ST);
// Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
// registers. However, save all lanes of callee-saved VGPRs. Due to this, we
@@ -1015,8 +1017,7 @@ void SIFrameLowering::emitCSRSpillStores(
StoreWWMRegisters(WWMScratchRegs);
auto EnableAllLanes = [&]() {
- unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
+ BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
};
if (!WWMCalleeSavedRegs.empty()) {
@@ -1043,8 +1044,7 @@ void SIFrameLowering::emitCSRSpillStores(
TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
} else if (ScratchExecCopy) {
// FIXME: Split block and make terminator.
- unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
+ BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
.addReg(ScratchExecCopy, RegState::Kill);
LiveUnits.addReg(ScratchExecCopy);
}
@@ -1092,6 +1092,7 @@ void SIFrameLowering::emitCSRSpillRestores(
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ const AMDGPU::LaneMaskConstants &LMC = AMDGPU::getLaneMaskConstants(&ST);
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
@@ -1138,16 +1139,14 @@ void SIFrameLowering::emitCSRSpillRestores(
Register OrigExec = Return.getOperand(0).getReg();
if (!WWMScratchRegs.empty()) {
- unsigned XorOpc = ST.isWave32() ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64;
- BuildMI(MBB, MBBI, DL, TII->get(XorOpc), TRI.getExec())
+ BuildMI(MBB, MBBI, DL, TII->get(LMC.XorOpc), LMC.ExecReg)
.addReg(OrigExec)
.addImm(-1);
RestoreWWMRegisters(WWMScratchRegs);
}
// Restore original EXEC.
- unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec);
+ BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addReg(OrigExec);
return;
}
@@ -1159,8 +1158,7 @@ void SIFrameLowering::emitCSRSpillRestores(
RestoreWWMRegisters(WWMScratchRegs);
if (!WWMCalleeSavedRegs.empty()) {
if (ScratchExecCopy) {
- unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
+ BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);
} else {
ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ false,
@@ -1171,8 +1169,7 @@ void SIFrameLowering::emitCSRSpillRestores(
RestoreWWMRegisters(WWMCalleeSavedRegs);
if (ScratchExecCopy) {
// FIXME: Split block and make terminator.
- unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
+ BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
.addReg(ScratchExecCopy, RegState::Kill);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 561019bb65549..74f348b6771cd 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -14,6 +14,7 @@
#include "SIISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPULaneMaskUtils.h"
#include "AMDGPUTargetMachine.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -4809,6 +4810,7 @@ emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI,
MachineFunction *MF = OrigBB.getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const AMDGPU::LaneMaskConstants &LMC = AMDGPU::getLaneMaskConstants(&ST);
MachineBasicBlock::iterator I = LoopBB.begin();
const TargetRegisterClass *BoolRC = TRI->getBoolRC();
@@ -4840,10 +4842,7 @@ emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI,
.addReg(Idx.getReg(), 0, Idx.getSubReg());
// Update EXEC, save the original EXEC value to VCC.
- BuildMI(LoopBB, I, DL,
- TII->get(ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32
- : AMDGPU::S_AND_SAVEEXEC_B64),
- NewExec)
+ BuildMI(LoopBB, I, DL, TII->get(LMC.AndSaveExecOpc), NewExec)
.addReg(CondReg, RegState::Kill);
MRI.setSimpleHint(NewExec, CondReg);
@@ -4870,13 +4869,9 @@ emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI,
}
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
- unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
MachineInstr *InsertPt =
- BuildMI(LoopBB, I, DL,
- TII->get(ST.isWave32() ? AMDGPU::S_XOR_B32_term
- : AMDGPU::S_XOR_B64_term),
- Exec)
- .addReg(Exec)
+ BuildMI(LoopBB, I, DL, TII->get(LMC.XorTermOpc), LMC.ExecReg)
+ .addReg(LMC.ExecReg)
.addReg(NewExec);
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
@@ -4911,15 +4906,14 @@ loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI,
Register DstReg = MI.getOperand(0).getReg();
Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
Register TmpExec = MRI.createVirtualRegister(BoolXExecRC);
- unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ const AMDGPU::LaneMaskConstants &LMC = AMDGPU::getLaneMaskConstants(&ST);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec);
// Save the EXEC mask
// clang-format off
- BuildMI(MBB, I, DL, TII->get(MovExecOpc), SaveExec)
- .addReg(Exec);
+ BuildMI(MBB, I, DL, TII->get(LMC.MovOpc), SaveExec)
+ .addReg(LMC.ExecReg);
// clang-format on
auto [LoopBB, RemainderBB] = splitBlockForLoop(MI, MBB, false);
@@ -4939,7 +4933,7 @@ loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI,
LoopBB->addSuccessor(LandingPad);
MachineBasicBlock::iterator First = LandingPad->begin();
// clang-format off
- BuildMI(*LandingPad, First, DL, TII->get(MovExecOpc), Exec)
+ BuildMI(*LandingPad, First, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
.addReg(SaveExec);
// clang-format on
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index df638bd65bdaa..7800897322d24 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -14,6 +14,7 @@
#include "SIInstrInfo.h"
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPULaneMaskUtils.h"
#include "GCNHazardRecognizer.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
@@ -1195,6 +1196,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *BoolXExecRC = RI.getWaveMaskRegClass();
+ const AMDGPU::LaneMaskConstants &LMC = AMDGPU::getLaneMaskConstants(&ST);
assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
"Not a VGPR32 reg");
@@ -1213,10 +1215,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
switch (Cond[0].getImm()) {
case SIInstrInfo::SCC_TRUE: {
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64), SReg)
- .addImm(1)
- .addImm(0);
+ BuildMI(MBB, I, DL, get(LMC.CSelectOpc), SReg).addImm(1).addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
@@ -1227,10 +1226,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
}
case SIInstrInfo::SCC_FALSE: {
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64), SReg)
- .addImm(0)
- .addImm(1);
+ BuildMI(MBB, I, DL, get(LMC.CSelectOpc), SReg).addImm(0).addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
@@ -1270,13 +1266,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
case SIInstrInfo::EXECNZ: {
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
- : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
- .addImm(0);
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64), SReg)
- .addImm(1)
- .addImm(0);
+ BuildMI(MBB, I, DL, get(LMC.OrSaveExecOpc), SReg2).addImm(0);
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/154718
More information about the llvm-commits
mailing list