[llvm] 396f677 - [AMDGPU] Remove unused VGPRSingleUseHintInsts feature (#109769)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 24 02:58:04 PDT 2024
Author: Scott Egerton
Date: 2024-09-24T10:58:00+01:00
New Revision: 396f6775143ffa80b9f0e72e7250613092d88124
URL: https://github.com/llvm/llvm-project/commit/396f6775143ffa80b9f0e72e7250613092d88124
DIFF: https://github.com/llvm/llvm-project/commit/396f6775143ffa80b9f0e72e7250613092d88124.diff
LOG: [AMDGPU] Remove unused VGPRSingleUseHintInsts feature (#109769)
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AMDGPU.h
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/CMakeLists.txt
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SOPInstructions.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Target/AMDGPU/VOP1Instructions.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/lib/Target/AMDGPU/VOPCInstructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/MC/AMDGPU/gfx11_unsupported.s
llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
Removed:
llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
llvm/test/MC/AMDGPU/gfx1150_asm_sopp.s
llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_sopp.txt
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 4b48b54b18bb99..9e11b13c101d47 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -611,9 +611,7 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor
- ``gfx1152``
SALU floating point instructions
- and single-use VGPR hint
- instructions are not available
- on:
+ are not available on:
- ``gfx1150``
- ``gfx1151``
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index b2dd354e496a2e..4abb5a63ab6d2c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -405,9 +405,6 @@ extern char &SIModeRegisterID;
void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
extern char &AMDGPUInsertDelayAluID;
-void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
-extern char &AMDGPUInsertSingleUseVDSTID;
-
void initializeSIInsertHardClausesPass(PassRegistry &);
extern char &SIInsertHardClausesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 919e698e76b33b..3626fd8bc78c15 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -929,12 +929,6 @@ def FeatureSALUFloatInsts : SubtargetFeature<"salu-float",
"Has SALU floating point instructions"
>;
-def FeatureVGPRSingleUseHintInsts : SubtargetFeature<"vgpr-singleuse-hint",
- "HasVGPRSingleUseHintInsts",
- "true",
- "Has single-use VGPR hint instructions"
->;
-
def FeaturePseudoScalarTrans : SubtargetFeature<"pseudo-scalar-trans",
"HasPseudoScalarTrans",
"true",
@@ -1615,14 +1609,12 @@ def FeatureISAVersion11_5_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
FeatureDPPSrc1SGPR,
- FeatureVGPRSingleUseHintInsts,
FeatureRequiredExportPriority])>;
def FeatureISAVersion11_5_1 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
FeatureDPPSrc1SGPR,
- FeatureVGPRSingleUseHintInsts,
Feature1_5xVGPRs,
FeatureRequiredExportPriority])>;
@@ -1630,7 +1622,6 @@ def FeatureISAVersion11_5_2 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
FeatureDPPSrc1SGPR,
- FeatureVGPRSingleUseHintInsts,
FeatureRequiredExportPriority])>;
def FeatureISAVersion12 : FeatureSet<
@@ -1663,7 +1654,6 @@ def FeatureISAVersion12 : FeatureSet<
FeatureSALUFloatInsts,
FeaturePseudoScalarTrans,
FeatureHasRestrictedSOffset,
- FeatureVGPRSingleUseHintInsts,
FeatureScalarDwordx3Loads,
FeatureDPPSrc1SGPR,
FeatureMaxHardClauseLength32,
@@ -2271,9 +2261,6 @@ def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
AssemblerPredicate<(all_of FeatureSALUFloatInsts)>;
-def HasVGPRSingleUseHintInsts : Predicate<"Subtarget->hasVGPRSingleUseHintInsts()">,
- AssemblerPredicate<(all_of FeatureVGPRSingleUseHintInsts)>;
-
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
deleted file mode 100644
index 43b3bf43fe56db..00000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ /dev/null
@@ -1,245 +0,0 @@
-//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
-/// instructions that produce single-use VGPR values. If the value is forwarded
-/// to the consumer instruction prior to VGPR writeback, the hardware can
-/// then skip (kill) the VGPR write.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUGenSearchableTables.inc"
-#include "GCNSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Register.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/MCRegister.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
-#include <array>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
-
-namespace {
-class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
-private:
- const SIInstrInfo *SII;
- class SingleUseInstruction {
- private:
- static const unsigned MaxSkipRange = 0b111;
- static const unsigned MaxNumberOfSkipRegions = 2;
-
- unsigned LastEncodedPositionEnd;
- MachineInstr *ProducerInstr;
-
- std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
- SmallVector<unsigned, MaxNumberOfSkipRegions> SkipRegions;
-
- // Adds a skip region into the instruction.
- void skip(const unsigned ProducerPosition) {
- while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
- SkipRegions.push_back(MaxSkipRange);
- LastEncodedPositionEnd += MaxSkipRange;
- }
- SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd);
- LastEncodedPositionEnd = ProducerPosition;
- }
-
- bool currentRegionHasSpace() {
- const auto Region = SkipRegions.size();
- // The first region has an extra bit of encoding space.
- return SingleUseRegions[Region] <
- ((Region == MaxNumberOfSkipRegions) ? 0b1111U : 0b111U);
- }
-
- unsigned encodeImm() {
- // Handle the first Single Use Region separately as it has an extra bit
- // of encoding space.
- unsigned Imm = SingleUseRegions[SkipRegions.size()];
- unsigned ShiftAmount = 4;
- for (unsigned i = SkipRegions.size(); i > 0; i--) {
- Imm |= SkipRegions[i - 1] << ShiftAmount;
- ShiftAmount += 3;
- Imm |= SingleUseRegions[i - 1] << ShiftAmount;
- ShiftAmount += 3;
- }
- return Imm;
- }
-
- public:
- SingleUseInstruction(const unsigned ProducerPosition,
- MachineInstr *Producer)
- : LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
- SingleUseRegions({1, 0, 0}) {}
-
- // Returns false if adding a new single use producer failed. This happens
- // because it could not be encoded, either because there is no room to
- // encode another single use producer region or that this single use
- // producer is too far away to encode the amount of instructions to skip.
- bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) {
- // Producer is too far away to encode into this instruction or another
- // skip region is needed and SkipRegions.size() = 2 so there's no room for
- // another skip region, therefore a new instruction is needed.
- if (LastEncodedPositionEnd +
- (MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) <
- ProducerPosition)
- return false;
-
- // If a skip region is needed.
- if (LastEncodedPositionEnd != ProducerPosition ||
- !currentRegionHasSpace()) {
- // If the current region is out of space therefore a skip region would
- // be needed, but there is no room for another skip region.
- if (SkipRegions.size() == MaxNumberOfSkipRegions)
- return false;
- skip(ProducerPosition);
- }
-
- SingleUseRegions[SkipRegions.size()]++;
- LastEncodedPositionEnd = ProducerPosition + 1;
- ProducerInstr = MI;
- return true;
- }
-
- auto emit(const SIInstrInfo *SII) {
- return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(),
- SII->get(AMDGPU::S_SINGLEUSE_VDST))
- .addImm(encodeImm());
- }
- };
-
-public:
- static char ID;
-
- AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
-
- void insertSingleUseInstructions(
- ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers) const {
- SmallVector<SingleUseInstruction> Instructions;
-
- for (auto &[Position, MI] : SingleUseProducers) {
- // Encode this position into the last single use instruction if possible.
- if (Instructions.empty() ||
- !Instructions.back().tryAddProducer(Position, MI)) {
- // If not, add a new instruction.
- Instructions.push_back(SingleUseInstruction(Position, MI));
- }
- }
-
- for (auto &Instruction : Instructions)
- Instruction.emit(SII);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- const auto &ST = MF.getSubtarget<GCNSubtarget>();
- if (!ST.hasVGPRSingleUseHintInsts())
- return false;
-
- SII = ST.getInstrInfo();
- const auto *TRI = &SII->getRegisterInfo();
- bool InstructionEmitted = false;
-
- for (MachineBasicBlock &MBB : MF) {
- DenseMap<MCRegUnit, unsigned> RegisterUseCount;
-
- // Handle boundaries at the end of basic block separately to avoid
- // false positives. If they are live at the end of a basic block then
- // assume it has more uses later on.
- for (const auto &Liveout : MBB.liveouts()) {
- for (MCRegUnitMaskIterator Units(Liveout.PhysReg, TRI); Units.isValid();
- ++Units) {
- const auto [Unit, Mask] = *Units;
- if ((Mask & Liveout.LaneMask).any())
- RegisterUseCount[Unit] = 2;
- }
- }
-
- SmallVector<std::pair<unsigned, MachineInstr *>>
- SingleUseProducerPositions;
-
- unsigned VALUInstrCount = 0;
- for (MachineInstr &MI : reverse(MBB.instrs())) {
- // All registers in all operands need to be single use for an
- // instruction to be marked as a single use producer.
- bool AllProducerOperandsAreSingleUse = true;
-
- // Gather a list of Registers used before updating use counts to avoid
- // double counting registers that appear multiple times in a single
- // MachineInstr.
- SmallVector<MCRegUnit> RegistersUsed;
-
- for (const auto &Operand : MI.all_defs()) {
- const auto Reg = Operand.getReg();
-
- const auto RegUnits = TRI->regunits(Reg);
- if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {
- return RegisterUseCount[Unit] > 1;
- }))
- AllProducerOperandsAreSingleUse = false;
-
- // Reset uses count when a register is no longer live.
- for (const MCRegUnit Unit : RegUnits)
- RegisterUseCount.erase(Unit);
- }
-
- for (const auto &Operand : MI.all_uses()) {
- const auto Reg = Operand.getReg();
-
- // Count the number of times each register is read.
- for (const MCRegUnit Unit : TRI->regunits(Reg)) {
- if (!is_contained(RegistersUsed, Unit))
- RegistersUsed.push_back(Unit);
- }
- }
- for (const MCRegUnit Unit : RegistersUsed)
- RegisterUseCount[Unit]++;
-
- // Do not attempt to optimise across exec mask changes.
- if (MI.modifiesRegister(AMDGPU::EXEC, TRI) ||
- AMDGPU::isInvalidSingleUseConsumerInst(MI.getOpcode())) {
- for (auto &UsedReg : RegisterUseCount)
- UsedReg.second = 2;
- }
-
- if (!SIInstrInfo::isVALU(MI) ||
- AMDGPU::isInvalidSingleUseProducerInst(MI.getOpcode()))
- continue;
- if (AllProducerOperandsAreSingleUse) {
- SingleUseProducerPositions.push_back({VALUInstrCount, &MI});
- InstructionEmitted = true;
- }
- VALUInstrCount++;
- }
- insertSingleUseInstructions(SingleUseProducerPositions);
- }
- return InstructionEmitted;
- }
-};
-} // namespace
-
-char AMDGPUInsertSingleUseVDST::ID = 0;
-
-char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
-
-INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
- "AMDGPU Insert SingleUseVDST", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 04fdee0819b502..abd50748f2cc05 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -311,12 +311,6 @@ static cl::opt<bool> EnableSIModeRegisterPass(
cl::init(true),
cl::Hidden);
-// Enable GFX11.5+ s_singleuse_vdst insertion
-static cl::opt<bool>
- EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
- cl::desc("Enable s_singleuse_vdst insertion"),
- cl::init(false), cl::Hidden);
-
// Enable GFX11+ s_delay_alu insertion
static cl::opt<bool>
EnableInsertDelayAlu("amdgpu-enable-delay-alu",
@@ -450,7 +444,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowLegacyPass(*PR);
- initializeAMDGPUInsertSingleUseVDSTPass(*PR);
initializeAMDGPUInsertDelayAluPass(*PR);
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
@@ -1518,9 +1511,6 @@ void GCNPassConfig::addPreEmitPass() {
// cases.
addPass(&PostRAHazardRecognizerID);
- if (isPassEnabled(EnableInsertSingleUseVDST, CodeGenOptLevel::Less))
- addPass(&AMDGPUInsertSingleUseVDSTID);
-
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
addPass(&AMDGPUInsertDelayAluID);
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index e813653158e5d9..7c883cc2017ddd 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -81,7 +81,6 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUMCInstLower.cpp
AMDGPUMemoryUtils.cpp
AMDGPUIGroupLP.cpp
- AMDGPUInsertSingleUseVDST.cpp
AMDGPUMarkLastScratchLoad.cpp
AMDGPUMIRFormatter.cpp
AMDGPUOpenCLEnqueuedBlockLowering.cpp
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index a4ae8a1be32258..e6b7342d5fffcf 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -215,7 +215,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasPackedTID = false;
bool ScalarizeGlobal = false;
bool HasSALUFloatInsts = false;
- bool HasVGPRSingleUseHintInsts = false;
bool HasPseudoScalarTrans = false;
bool HasRestrictedSOffset = false;
@@ -1280,8 +1279,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
- bool hasVGPRSingleUseHintInsts() const { return HasVGPRSingleUseHintInsts; }
-
bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; }
bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index c016be2fc6c0fb..087ca1f954464d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2409,8 +2409,6 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field bit EnableClamp = _EnableClamp;
field bit IsTrue16 = 0;
field bit IsRealTrue16 = 0;
- field bit IsInvalidSingleUseConsumer = 0;
- field bit IsInvalidSingleUseProducer = 0;
field ValueType DstVT = ArgVT[0];
field ValueType Src0VT = ArgVT[1];
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 2e73a1a15f6b32..9da27a7c7ee7d6 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1752,11 +1752,6 @@ let OtherPredicates = [HasExportInsts] in
"$simm16">;
} // End SubtargetPredicate = isGFX11Plus
-let SubtargetPredicate = HasVGPRSingleUseHintInsts in {
- def S_SINGLEUSE_VDST :
- SOPP_Pseudo<"s_singleuse_vdst", (ins s16imm:$simm16), "$simm16">;
-} // End SubtargetPredicate = HasVGPRSingeUseHintInsts
-
let SubtargetPredicate = isGFX12Plus, hasSideEffects = 1 in {
def S_WAIT_LOADCNT :
SOPP_Pseudo<"s_wait_loadcnt", (ins s16imm:$simm16), "$simm16",
@@ -2676,12 +2671,6 @@ defm S_ICACHE_INV : SOPP_Real_32_gfx11_gfx12<0x03c>;
defm S_BARRIER : SOPP_Real_32_gfx11<0x03d>;
-//===----------------------------------------------------------------------===//
-// SOPP - GFX1150, GFX12.
-//===----------------------------------------------------------------------===//
-
-defm S_SINGLEUSE_VDST : SOPP_Real_32_gfx11_gfx12<0x013>;
-
//===----------------------------------------------------------------------===//
// SOPP - GFX6, GFX7, GFX8, GFX9, GFX10
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 8b5ec8793d84a2..f32c82f1e4ba4c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -379,12 +379,6 @@ struct VOPTrue16Info {
bool IsTrue16;
};
-struct SingleUseExceptionInfo {
- uint16_t Opcode;
- bool IsInvalidSingleUseConsumer;
- bool IsInvalidSingleUseProducer;
-};
-
struct FP8DstByteSelInfo {
uint16_t Opcode;
bool HasFP8DstByteSel;
@@ -396,8 +390,6 @@ struct FP8DstByteSelInfo {
#define GET_MTBUFInfoTable_IMPL
#define GET_MUBUFInfoTable_DECL
#define GET_MUBUFInfoTable_IMPL
-#define GET_SingleUseExceptionTable_DECL
-#define GET_SingleUseExceptionTable_IMPL
#define GET_SMInfoTable_DECL
#define GET_SMInfoTable_IMPL
#define GET_VOP1InfoTable_DECL
@@ -626,16 +618,6 @@ bool isTrue16Inst(unsigned Opc) {
return Info ? Info->IsTrue16 : false;
}
-bool isInvalidSingleUseConsumerInst(unsigned Opc) {
- const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
- return Info && Info->IsInvalidSingleUseConsumer;
-}
-
-bool isInvalidSingleUseProducerInst(unsigned Opc) {
- const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
- return Info && Info->IsInvalidSingleUseProducer;
-}
-
bool isFP8DstSelInst(unsigned Opc) {
const FP8DstByteSelInfo *Info = getFP8DstByteSelHelper(Opc);
return Info ? Info->HasFP8DstByteSel : false;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 35c080d8e0bebc..da37534f2fa4ff 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -870,6 +870,8 @@ bool isInvalidSingleUseConsumerInst(unsigned Opc);
LLVM_READONLY
bool isInvalidSingleUseProducerInst(unsigned Opc);
+bool isDPMACCInstruction(unsigned Opc);
+
LLVM_READONLY
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 33f2f9f1f5c5b9..bd805059705783 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -252,7 +252,6 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped, untyped]> {
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32", VOP_READFIRSTLANE,
[], 1> {
let isConvergent = 1;
- let IsInvalidSingleUseConsumer = 1;
}
foreach vt = Reg32Types.types in {
@@ -375,7 +374,6 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC32 = VRegSrc_32;
let Src0RC64 = VRegSrc_32;
- let IsInvalidSingleUseConsumer = 1;
}
// Special case because there are no true output operands. Hack vdst
@@ -419,12 +417,8 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let EmitDst = 1; // force vdst emission
}
-let IsInvalidSingleUseProducer = 1 in {
- def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
- def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32> {
- let IsInvalidSingleUseConsumer = 1;
- }
-}
+def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
+def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>;
let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in {
// v_movreld_b32 is a special case because the destination output
@@ -541,7 +535,6 @@ let SubtargetPredicate = isGFX9Plus in {
let Constraints = "$vdst = $src1, $vdst1 = $src0";
let DisableEncoding = "$vdst1,$src1";
let SchedRW = [Write64Bit, Write64Bit];
- let IsInvalidSingleUseConsumer = 1;
}
let isReMaterializable = 1 in
@@ -708,8 +701,6 @@ let SubtargetPredicate = isGFX10Plus in {
let Constraints = "$vdst = $src1, $vdst1 = $src0";
let DisableEncoding = "$vdst1,$src1";
let SchedRW = [Write64Bit, Write64Bit];
- let IsInvalidSingleUseConsumer = 1;
- let IsInvalidSingleUseProducer = 1;
}
} // End Uses = [M0]
} // End SubtargetPredicate = isGFX10Plus
@@ -743,10 +734,7 @@ let SubtargetPredicate = isGFX11Plus in {
}
// Restrict src0 to be VGPR
def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
- [], /*VOP1Only=*/ 1> {
- let IsInvalidSingleUseConsumer = 1;
- let IsInvalidSingleUseProducer = 1;
- }
+ [], /*VOP1Only=*/ 1>;
defm V_MOV_B16 : VOP1Inst_t16<"v_mov_b16", VOP_I16_I16>;
defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>;
defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index dd48607402eb0b..52f7be3b4577df 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -788,12 +788,10 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag,
} // End isCommutable = 1
// These are special and do not read the exec mask.
-let isConvergent = 1, Uses = []<Register>, IsInvalidSingleUseConsumer = 1 in {
+let isConvergent = 1, Uses = []<Register> in {
def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, []>;
let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
-def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, []> {
- let IsInvalidSingleUseProducer = 1;
- }
+def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, []>;
} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in
} // End isConvergent = 1
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 466114b95f9f90..20beb41b7b58bb 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -157,12 +157,12 @@ defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_l
} // End SubtargetPredicate = isNotGFX12Plus
} // End SchedRW = [WriteDoubleAdd]
-let SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1 in {
+let SchedRW = [WriteIntMul] in {
defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", V_MUL_PROF<VOP_I32_I32_I32>, DivergentBinFrag<mul>>;
defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF<VOP_I32_I32_I32>, mulhu>;
defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>;
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>;
-} // End SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1
+} // End SchedRW = [WriteIntMul]
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
@@ -260,9 +260,9 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d
let isReMaterializable = 1 in
defm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-let Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1 in {
+let Constraints = "@earlyclobber $vdst" in {
defm V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
-} // End Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1
+} // End Constraints = "@earlyclobber $vdst"
let isReMaterializable = 1 in {
@@ -277,16 +277,14 @@ let SchedRW = [Write64Bit] in {
defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, csra_64>;
} // End SubtargetPredicate = isGFX6GFX7
- let IsInvalidSingleUseConsumer = 1 in {
let SubtargetPredicate = isGFX8Plus in {
defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshr_rev_64>;
defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, cashr_rev_64>;
- } // End SubtargetPredicate = isGFX8Plus, , IsInvalidSingleUseConsumer = 1
+ } // End SubtargetPredicate = isGFX8Plus
let SubtargetPredicate = isGFX8GFX9GFX10GFX11 in {
defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshl_rev_64>;
} // End SubtargetPredicate = isGFX8GFX9GFX10GFX11
- } // End IsInvalidSingleUseConsumer = 1
} // End SchedRW = [Write64Bit]
} // End isReMaterializable = 1
@@ -311,14 +309,14 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
let HasModifiers = 0;
}
-let SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1 in {
+let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
-} // End SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1
+} // End SubtargetPredicate = isGFX7Plus
-let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1 in {
+let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in {
defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
@@ -328,7 +326,7 @@ let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseCons
defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
}
-} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1
+} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU]
let FPDPRounding = 1 in {
@@ -865,10 +863,10 @@ let SubtargetPredicate = isGFX10Plus in {
} // End isCommutable = 1, isReMaterializable = 1
def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>;
- let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in", IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1 in {
+ let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile>;
defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
- } // End $vdst = $vdst_in, DisableEncoding $vdst_in, IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1
+ } // End $vdst = $vdst_in, DisableEncoding $vdst_in
foreach vt = Reg32Types.types in {
def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>;
@@ -1286,12 +1284,11 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
-let IsInvalidSingleUseConsumer = 1 in {
- defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>;
- let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1 in {
- defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>;
- } // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32: $src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1
-} // End IsInvalidSingleUseConsumer = 1
+defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>;
+
+let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
+ defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>;
+} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
let SubtargetPredicate = isGFX10Before1030 in {
defm V_MUL_LO_I32 : VOP3_Real_gfx10<0x16b>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index f4d2c29158f49f..5eee71887964ad 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -382,19 +382,15 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
AMDGPUfdot2, 1/*ExplicitClamp*/>;
let OtherPredicates = [HasDot7Insts] in {
-let IsInvalidSingleUseConsumer = 1 in {
- defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
- VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
-}
+defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
} // End OtherPredicates = [HasDot7Insts]
let OtherPredicates = [HasDot1Insts] in {
-let IsInvalidSingleUseConsumer = 1 in {
- defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
- VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
-}
+defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
} // End OtherPredicates = [HasDot1Insts]
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index be862b44917e15..d6e08dce130ced 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -464,10 +464,9 @@ multiclass VOPC_I16 <string opName, SDPatternOperator cond = COND_NULL,
multiclass VOPC_I32 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_I32_I32, cond, revOp, 0>;
-let IsInvalidSingleUseConsumer = 1 in {
- multiclass VOPC_I64 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
- VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
-}
+multiclass VOPC_I64 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
+ VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
+
multiclass VOPCX_F16<string opName, string revOp = opName> {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
@@ -502,10 +501,8 @@ multiclass VOPCX_I16<string opName, string revOp = opName> {
multiclass VOPCX_I32 <string opName, string revOp = opName> :
VOPCX_Pseudos <opName, VOPC_I1_I32_I32, VOPC_I32_I32, COND_NULL, revOp>;
-let IsInvalidSingleUseConsumer = 1 in {
- multiclass VOPCX_I64 <string opName, string revOp = opName> :
- VOPCX_Pseudos <opName, VOPC_I1_I64_I64, VOPC_I64_I64, COND_NULL, revOp>;
-}
+multiclass VOPCX_I64 <string opName, string revOp = opName> :
+ VOPCX_Pseudos <opName, VOPC_I1_I64_I64, VOPC_I64_I64, COND_NULL, revOp>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 5a460ef0d42320..05a7d907d237ae 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -17,8 +17,6 @@ class LetDummies {
bit isReMaterializable;
bit isAsCheapAsAMove;
bit FPDPRounding;
- bit IsInvalidSingleUseConsumer;
- bit IsInvalidSingleUseProducer;
Predicate SubtargetPredicate;
string Constraints;
string DisableEncoding;
@@ -67,8 +65,6 @@ class VOP_Pseudo <string opName, string suffix, VOPProfile P, dag outs, dag ins,
string Mnemonic = opName;
Instruction Opcode = !cast<Instruction>(NAME);
bit IsTrue16 = P.IsTrue16;
- bit IsInvalidSingleUseConsumer = P.IsInvalidSingleUseConsumer;
- bit IsInvalidSingleUseProducer = P.IsInvalidSingleUseProducer;
VOPProfile Pfl = P;
string AsmOperands;
@@ -165,8 +161,6 @@ class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
class VOP_Real<VOP_Pseudo ps> {
Instruction Opcode = !cast<Instruction>(NAME);
bit IsSingle = ps.Pfl.IsSingle;
- bit IsInvalidSingleUseConsumer = ps.Pfl.IsInvalidSingleUseConsumer;
- bit IsInvalidSingleUseProducer = ps.Pfl.IsInvalidSingleUseProducer;
}
class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemonic> :
@@ -844,9 +838,6 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
let DecoderNamespace = "GFX8";
-
- let IsInvalidSingleUseConsumer = !not(VINTERP);
- let IsInvalidSingleUseProducer = !not(VINTERP);
}
class VOP3_DPP_Pseudo <string OpName, VOPProfile P> :
@@ -1714,13 +1705,4 @@ def VOPTrue16Table : GenericTable {
let PrimaryKey = ["Opcode"];
let PrimaryKeyName = "getTrue16OpcodeHelper";
-}
-
-def SingleUseExceptionTable : GenericTable {
- let FilterClass = "VOP_Pseudo";
- let CppTypeName = "SingleUseExceptionInfo";
- let Fields = ["Opcode", "IsInvalidSingleUseConsumer", "IsInvalidSingleUseProducer"];
-
- let PrimaryKey = ["Opcode"];
- let PrimaryKeyName = "getSingleUseExceptionHelper";
-}
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
deleted file mode 100644
index 9e65ce329df431..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
+++ /dev/null
@@ -1,1420 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
-
-# One single-use producer.
----
-name: one_producer
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: one_producer
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr2
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
- $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- bb.1:
- liveins: $vgpr0, $vgpr2
-...
-
-# One single-use producer of a 64-bit value.
----
-name: one_producer_64bit
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: one_producer_64bit
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0_vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
- ; CHECK-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e64 $vgpr2_vgpr3, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr4_vgpr5
- bb.0:
- liveins: $vgpr0_vgpr1
- $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
- $vgpr4_vgpr5 = V_MOV_B64_e64 $vgpr2_vgpr3, implicit $exec
- bb.1:
- liveins: $vgpr4_vgpr5
-...
-
-# Two consecutive single-use producers.
----
-name: two_producers
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: two_producers
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 2
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr3
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
- $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- bb.1:
- liveins: $vgpr0, $vgpr3
-...
-
-# Redefinitions of v0.
----
-name: redefinitions
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: redefinitions
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 4
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- bb.0:
- liveins: $vgpr0
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
-...
-
-# One producer with no consumers.
----
-name: no_consumer
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: no_consumer
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
- bb.1:
-...
-
-# One consumer with two uses of the same value.
----
-name: one_consumer_two_uses
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: one_consumer_two_uses
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr2
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
- bb.1:
- liveins: $vgpr0, $vgpr2
-...
-
-# A longer example.
----
-name: longer_example
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: longer_example
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 274
- ; CHECK-NEXT: $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode
- ; CHECK-NEXT: $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode
- ; CHECK-NEXT: $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode
- ; CHECK-NEXT: $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode
- ; CHECK-NEXT: $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode
- ; CHECK-NEXT: $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode
- ; CHECK-NEXT: $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
- ; CHECK-NEXT: $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
- ; CHECK-NEXT: $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode
- ; CHECK-NEXT: $vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr16, $vgpr18
- bb.0:
- liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19
- $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode
- $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode
- $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode
- $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode
- $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode
- $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode
- $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
- $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
- $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode
- $vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode
- bb.1:
- liveins: $vgpr16, $vgpr18
-...
-
-# Multiple uses of v0.
----
-name: multiple_uses_1
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: multiple_uses_1
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1, $vgpr2
- bb.0:
- liveins: $vgpr0
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr1, $vgpr2
-...
-
-# Multiple uses of v0 and redefinitions of v1 and v2.
----
-name: multiple_uses_2
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: multiple_uses_2
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: S_SINGLEUSE_VDST 2
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1, $vgpr2
- bb.0:
- liveins: $vgpr0
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr1, $vgpr2
-...
-
-# Multiple uses of all but v1.
----
-name: multiple_uses_3
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: multiple_uses_3
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr2, $vgpr3
- bb.0:
- liveins: $vgpr0
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec
- bb.1:
- liveins: $vgpr2, $vgpr3
-...
-
-# Second use is an instruction that reads and writes v1.
----
-name: multiple_uses_4
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: multiple_uses_4
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
- $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- bb.1:
- liveins: $vgpr0, $vgpr1, $vgpr2
-...
-
-# Results are live-in to another basic block.
----
-name: basic_block_1
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: basic_block_1
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: liveins: $vgpr1, $vgpr2
- bb.0:
- liveins: $vgpr0
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr0, $vgpr1, $vgpr2
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.2:
- liveins: $vgpr1, $vgpr2
-...
-
-# Result v2 has multiple uses in another basic block.
----
-name: basic_block_2
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: basic_block_2
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: liveins: $vgpr3
- bb.0:
- liveins: $vgpr0, $vgpr1
- $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
- bb.1:
- liveins: $vgpr2
- $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
- bb.2:
- liveins: $vgpr3
-...
-
-# Results are redefined in another basic block.
----
-name: basic_block_3
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: basic_block_3
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
- bb.0:
- liveins: $vgpr0
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr0, $vgpr1
- $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
- bb.2:
- liveins: $vgpr0, $vgpr1, $vgpr2
-...
-
-# Exec modified between producer and consumer.
----
-name: exec_mask
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: exec_mask
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr0_sgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0
- bb.0:
- liveins: $sgpr0_sgpr1
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $exec = COPY $sgpr0_sgpr1
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr0
-...
-
-# Exec_lo modified between producer and consumer.
----
-name: exec_mask_lo
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: exec_mask_lo
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: $exec_lo = COPY $sgpr0
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0
- bb.0:
- liveins: $sgpr0
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $exec_lo = COPY $sgpr0
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr0
-...
-
-# Exec_hi modified between producer and consumer.
----
-name: exec_mask_hi
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: exec_mask_hi
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: $exec_hi = COPY $sgpr0
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0
- bb.0:
- liveins: $sgpr0
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $exec_hi = COPY $sgpr0
- $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr0
-...
-
-# Write 32-bit vgpr and then read from low 16 bits.
----
-name: write_full_read_lo
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_full_read_lo
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1_lo16
- bb.0:
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
- bb.1:
- liveins: $vgpr1_lo16
-...
-
-# Write 32-bit vgpr and then read from high 16 bits.
----
-name: write_full_read_hi
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_full_read_hi
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1_hi16
- bb.0:
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
- bb.1:
- liveins: $vgpr1_hi16
-...
-
-# Write 32-bit vgpr and then read from both halves.
----
-name: write_full_read_both
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_full_read_both
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
- ; CHECK-NEXT: $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1
- bb.0:
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
- $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
- bb.1:
- liveins: $vgpr1
-...
-
-# Write 32-bit vgpr and then read from both halves in the same instruction.
----
-name: write_full_read_both_same_instruction
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_full_read_both_same_instruction
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1_lo16 = V_ADD_F16_t16_e32 $vgpr0_lo16, $vgpr0_hi16, implicit $mode, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1_lo16
- bb.0:
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $vgpr1_lo16 = V_ADD_F16_t16_e32 $vgpr0_lo16, $vgpr0_hi16, implicit $mode, implicit $exec
- bb.1:
- liveins: $vgpr1_lo16
-...
-
-# Write low 16-bits and then read 32-bit vgpr.
----
-name: write_lo_read_full
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_lo_read_full
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1
- bb.0:
- liveins: $vgpr0
- $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr1
-...
-
-# Write low 16-bits and then read 32-bit vgpr twice.
----
-name: write_lo_read_full_twice
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_lo_read_full_twice
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1, $vgpr2
- bb.0:
- liveins: $vgpr0
- $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr1, $vgpr2
-...
-
-# Write high 16-bits and then read 32-bit vgpr.
----
-name: write_hi_read_full
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_hi_read_full
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1
- bb.0:
- liveins: $vgpr0
- $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr1
-...
-
-# Write high 16-bits and then read 32-bit vgpr twice.
----
-name: write_hi_read_full_twice
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_hi_read_full_twice
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1, $vgpr2
- bb.0:
- liveins: $vgpr0
- $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr1, $vgpr2
-...
-
-# Write low 16-bits and then write high 16-bits and then read 32-bit vgpr.
----
-name: write_both_read_full
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_both_read_full
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 2
- ; CHECK-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1
- bb.0:
- $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
- $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr1
-...
-
-# Write low 16-bits and then write high 16-bits and then read 32-bit vgpr twice.
----
-name: write_both_read_full_twice
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: write_both_read_full_twice
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1, $vgpr2
- bb.0:
- $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
- $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr1, $vgpr2
-...
-
-# Three single use producer instructions with non single use producer
-# instructions in between.
----
-name: three_producers_with_two_skips
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: three_producers_with_two_skips
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 9361
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr2, $vgpr4
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr2, $vgpr4
-...
-
-# Six single use producer instructions with non single use producer
-# instructions in between.
----
-name: six_producers_with_four_skips
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: six_producers_with_four_skips
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 145
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: S_SINGLEUSE_VDST 9362
- ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr2, $vgpr4, $vgpr7, $vgpr9
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr2, $vgpr4, $vgpr7, $vgpr9
-...
-
-# Five single use producer instructions, followed by
-# four non single use producers, followed by
-# three single use producer instructions, followed by
-# two non single use producers, followed by
-# one single use producer instructions.
----
-name: immediate_order
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: immediate_order
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 10693
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr13, $vgpr14
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr13, $vgpr14
-...
-
-# Maximum number of single use producers that can be encoded in a single
-# instruction.
----
-name: maximum_producers_single_instruction
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: maximum_producers_single_instruction
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 58255
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
-...
-
-# One more than the maximum number of single use producers that can be encoded
-# in a single instruction.
----
-name: too_many_producers_single_instruction
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: too_many_producers_single_instruction
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: S_SINGLEUSE_VDST 58255
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
-
-
-
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
-...
-
-# Maximum distance between single use producers that can be encoded in a single
-# instruction.
----
-name: maximum_skips_single_instruction
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: maximum_skips_single_instruction
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 15473
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
-...
-
-# One more than the maximum distance between single use producers that can be
-# encoded in a single instruction.
----
-name: too_many_skips_single_instruction
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: too_many_skips_single_instruction
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16
-...
-
-
-# Maximum possible encoding value with all bits of the immediate set
----
-name: all_immediate_bits_set
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: all_immediate_bits_set
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 65535
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr31 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr32 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr33 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr34 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr35 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr36 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr37 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr38 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr39 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr41 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr42 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr43 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr30, $vgpr31, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr31 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr32 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr33 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr34 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr35 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr36 = V_MOV_B32_e32 $vgpr0, implicit $exec
-
- $vgpr37 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr38 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr39 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr40 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr41 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr42 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr43 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr30, $vgpr31, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36
-
-...
-
-# Tests for multi-cycle instructions that are explicitly excluded.
-
-# Valid producers but invalid consumer opcodes.
----
-name: v_mul_hi_u32_e64
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: v_mul_hi_u32_e64
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr2 = V_MUL_HI_U32_e64 $vgpr0, $vgpr1, implicit $exec
- ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr3
- bb.0:
- liveins: $vgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr2 = V_MUL_HI_U32_e64 $vgpr0, $vgpr1, implicit $exec
- $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
- bb.1:
- liveins: $vgpr0, $vgpr3
-...
-
----
-name: v_cmpx_t_u64_e64
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: v_cmpx_t_u64_e64
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $sgpr0 = V_CMPX_EQ_U64_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit-def $exec, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0
- bb.0:
- liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $sgpr0 = V_CMPX_EQ_U64_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit-def $exec, implicit $exec
- S_BRANCH %bb.1
- bb.1:
- liveins: $vgpr0
-...
-
----
-name: v_lshlrev_b64_e64
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: v_lshlrev_b64_e64
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0_vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec
- ; CHECK-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr6_vgpr7 = V_LSHLREV_B64_e64 0, $vgpr4_vgpr5, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr4_vgpr5
- bb.0:
- liveins: $vgpr0_vgpr1
- $vgpr2_vgpr3 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec
- $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
- $vgpr6_vgpr7 = V_LSHLREV_B64_e64 0, $vgpr4_vgpr5, implicit $exec
- bb.1:
- liveins: $vgpr4_vgpr5
-...
-
-# Invalid producers but valid consumer opcodes.
----
-name: v_movereld_b32_e32
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: v_movereld_b32_e32
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $m0 = S_MOV_B32 0
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: V_MOVRELD_B32_e32 $vgpr2, $vgpr1, implicit $m0, implicit $exec, implicit-def $vgpr1_vgpr2, implicit undef $vgpr1_vgpr2(tied-def 4)
- ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr1, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr3
- bb.0:
- liveins: $vgpr0, $vgpr2
- $m0 = S_MOV_B32 0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- V_MOVRELD_B32_e32 $vgpr2, $vgpr1, implicit $m0, implicit $exec, implicit-def $vgpr1_vgpr2, implicit undef $vgpr1_vgpr2(tied-def 4)
- $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr1, implicit $exec
- bb.1:
- liveins: $vgpr3
-...
-
-# Invalid producers and invalid consumer opcodes.
----
-name: v_writelane_b32
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: v_writelane_b32
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $sgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr0, 0, $vgpr1
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0
- bb.0:
- liveins: $vgpr0, $sgpr0
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- $vgpr1 = V_WRITELANE_B32 $sgpr0, 0, $vgpr1
- $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
- bb.1:
- liveins: $vgpr0
-...
-
-# DPP instructions cannot be single use producers or consumers
----
-name: V_ADD_NC_U32_dpp
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: V_ADD_NC_U32_dpp
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vcc
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
- ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
- ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0
- bb.0:
- liveins: $vgpr0, $vcc
- $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
- $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
- $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
- bb.1:
- liveins: $vgpr0
-...
-
-# Exception to the rule that dpp instructions
-# cannot be single use producers or consumers
----
-name: V_INTERP_MOV_F32
-tracksRegLiveness: true
-body: |
- ; CHECK-LABEL: name: V_INTERP_MOV_F32
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
- ; CHECK-NEXT: $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr1
- bb.0:
- $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec
- $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
- bb.1:
- liveins: $vgpr1
-...
-
diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1150_asm_sopp.s
deleted file mode 100644
index 044ce48c267846..00000000000000
--- a/llvm/test/MC/AMDGPU/gfx1150_asm_sopp.s
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1150 -show-encoding %s | FileCheck --check-prefixes=GFX1150 %s
-
-s_singleuse_vdst 0x0000
-// GFX1150: encoding: [0x00,0x00,0x93,0xbf]
-
-s_singleuse_vdst 0xffff
-// GFX1150: encoding: [0xff,0xff,0x93,0xbf]
-
-s_singleuse_vdst 0x1234
-// GFX1150: encoding: [0x34,0x12,0x93,0xbf]
diff --git a/llvm/test/MC/AMDGPU/gfx11_unsupported.s b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
index c9756a068890e7..c565801d275bb8 100644
--- a/llvm/test/MC/AMDGPU/gfx11_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx11_unsupported.s
@@ -2014,9 +2014,6 @@ s_cmp_neq_f16 s1, s2
s_cmp_nlt_f16 s1, s2
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
-s_singleuse_vdst 0x1234
-// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
-
buffer_atomic_sub_clamp_u32 v5, off, s[8:11], s3 offset:0 glc
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
index e98659208d5a9c..fdcabc4352c69b 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
@@ -69,15 +69,6 @@ s_wait_alu depctr_va_sdst(3)
s_wait_alu depctr_va_vdst(14) depctr_va_sdst(6) depctr_vm_vsrc(6)
// GFX12: encoding: [0x9b,0xed,0x88,0xbf]
-s_singleuse_vdst 0x0000
-// GFX12: encoding: [0x00,0x00,0x93,0xbf]
-
-s_singleuse_vdst 0xffff
-// GFX12: encoding: [0xff,0xff,0x93,0xbf]
-
-s_singleuse_vdst 0x1234
-// GFX12: encoding: [0x34,0x12,0x93,0xbf]
-
s_barrier_wait 0xffff
// GFX12: encoding: [0xff,0xff,0x94,0xbf]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
index d6e8b7ee2f01f0..f819a61949b577 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
@@ -1,16 +1,11 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -disassemble -show-encoding -filetype=null < %s 2>&1 | FileCheck -check-prefix=GCN-ERR %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefixes=W32 %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefixes=W64 %s
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding -filetype=null < %s 2>&1 | FileCheck -check-prefix=GFX11-ERR %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding -filetype=null < %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s
# GCN-ERR: [[@LINE+1]]:1: warning: invalid instruction encoding
0xdf,0x00,0x00,0x02
-# this is s_singleuse_vdst 0x1234, which is only valid on gfx1150
-# GFX11-ERR: [[@LINE+1]]:1: warning: invalid instruction encoding
-0x34,0x12,0x93,0xbf
-
# this is s_waitcnt_vscnt exec_hi, 0x1234, which is valid on gfx11, but not on gfx12
# GFX12-ERR: [[@LINE+1]]:1: warning: invalid instruction encoding
0x34,0x12,0x7f,0xbc
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_sopp.txt
deleted file mode 100644
index 8fa266a73ff87f..00000000000000
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_sopp.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1150 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1150 %s
-
-# GFX1150: s_singleuse_vdst 0x0 ; encoding: [0x00,0x00,0x93,0xbf]
-0x00,0x00,0x93,0xbf
-
-# GFX1150: s_singleuse_vdst 0xffff ; encoding: [0xff,0xff,0x93,0xbf]
-0xff,0xff,0x93,0xbf
-
-# GFX1150: s_singleuse_vdst 0x1234 ; encoding: [0x34,0x12,0x93,0xbf]
-0x34,0x12,0x93,0xbf
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
index d42f920aa61dd7..d69801512c0786 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
@@ -60,14 +60,6 @@
# GFX12: s_wait_storecnt_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc9,0xbf]
0xd1,0xc1,0xc9,0xbf
-# GFX12: s_singleuse_vdst 0x0 ; encoding: [0x00,0x00,0x93,0xbf]
-0x00,0x00,0x93,0xbf
-
-# GFX12: s_singleuse_vdst 0xffff ; encoding: [0xff,0xff,0x93,0xbf]
-0xff,0xff,0x93,0xbf
-
-# GFX12: s_singleuse_vdst 0x1234 ; encoding: [0x34,0x12,0x93,0xbf]
-0x34,0x12,0x93,0xbf
# GFX12: s_barrier_wait 0xffff ; encoding: [0xff,0xff,0x94,0xbf]
0xff,0xff,0x94,0xbf
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
index dd4af4e98832f7..f83efbd3558025 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
@@ -152,7 +152,6 @@ static_library("LLVMAMDGPUCodeGen") {
"AMDGPUISelLowering.cpp",
"AMDGPUImageIntrinsicOptimizer.cpp",
"AMDGPUInsertDelayAlu.cpp",
- "AMDGPUInsertSingleUseVDST.cpp",
"AMDGPUInstCombineIntrinsic.cpp",
"AMDGPUInstrInfo.cpp",
"AMDGPUInstructionSelector.cpp",
More information about the llvm-commits
mailing list