[llvm] [AMDGPU] CodeGen for GFX12 S_WAIT_* instructions (PR #77438)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 12 06:02:52 PST 2024
================
@@ -354,20 +411,139 @@ class WaitcntBrackets {
int VgprUB = -1;
int SgprUB = -1;
unsigned VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS] = {{0}};
- // Wait cnt scores for every sgpr, only lgkmcnt is relevant.
+ // Wait cnt scores for every sgpr, only DS_CNT (corresponding to LGKMcnt
+ // pre-gfx12) or KM_CNT (gfx12+ only) are relevant.
unsigned SgprScores[SQ_MAX_PGM_SGPRS] = {0};
// Bitmask of the VmemTypes of VMEM instructions that might have a pending
// write to each vgpr.
unsigned char VgprVmemTypes[NUM_ALL_VGPRS] = {0};
};
+// This abstracts the logic for generating and updating S_WAIT* instructions
+// away from the analysis that determines where they are needed. This was
+// done because the set of counters and instructions for waiting on them
+// underwent a major shift with gfx12, sufficiently so that having this
+// abstraction allows the main analysis logic to be simpler than it would
+// otherwise have had to become.
+class WaitcntGenerator {
+protected:
+ const GCNSubtarget *ST = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ AMDGPU::IsaVersion IV;
+ InstCounterType MaxCounter;
+
+public:
+ WaitcntGenerator() {}
+ WaitcntGenerator(const GCNSubtarget *ST, InstCounterType MaxCounter)
+ : ST(ST), TII(ST->getInstrInfo()),
+ IV(AMDGPU::getIsaVersion(ST->getCPU())), MaxCounter(MaxCounter) {}
+
+ // Edits an existing sequence of wait count instructions according
+ // to an incoming Waitcnt value, which is itself updated to reflect
+ // any new wait count instructions which may need to be generated by
+ // WaitcntGenerator::createNewWaitcnt(). It will return true if any edits
+ // were made.
+ //
+ // This editing will usually be merely updated operands, but it may also
+ // delete instructions if the incoming Wait value indicates they are not
+ // needed. It may also remove existing instructions for which a wait
+ // is needed if it can be determined that it is better to generate new
+ // instructions later, as can happen on gfx12.
+ virtual bool
+ applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
+ MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait,
+ MachineBasicBlock::instr_iterator It) const = 0;
+
+ // Transform a soft waitcnt into a normal one.
+ bool promoteSoftWaitCnt(MachineInstr *Waitcnt) const;
+
+ // Generates new wait count instructions according to the value of
+ // Wait, returning true if any new instructions were created.
+ virtual bool createNewWaitcnt(MachineBasicBlock &Block,
+ MachineBasicBlock::instr_iterator It,
+ AMDGPU::Waitcnt Wait) = 0;
+
+ // Returns an array of bit masks which can be used to map values in
+ // WaitEventType to corresponding counter values in InstCounterType.
+ virtual const unsigned *getWaitEventMask() const = 0;
+
+ virtual ~WaitcntGenerator() = default;
+};
+
+class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
+public:
+ WaitcntGeneratorPreGFX12() {}
+ WaitcntGeneratorPreGFX12(const GCNSubtarget *ST)
+ : WaitcntGenerator(ST, NUM_NORMAL_INST_CNTS) {}
+
+ bool
+ applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
+ MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait,
+ MachineBasicBlock::instr_iterator It) const override;
+
+ bool createNewWaitcnt(MachineBasicBlock &Block,
+ MachineBasicBlock::instr_iterator It,
+ AMDGPU::Waitcnt Wait) override;
+
+ const unsigned *getWaitEventMask() const override {
+ assert(ST);
+
+ static const unsigned WaitEventMaskForInstPreGFX12[NUM_INST_CNTS] = {
----------------
jayfoad wrote:
Right, I'd prefer not to change this in this patch.
https://github.com/llvm/llvm-project/pull/77438
More information about the llvm-commits
mailing list