[clang] [llvm] [AMDGPU] Extend __builtin_amdgcn_sched_group_barrier to support rules. (PR #85304)

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Wed May 8 09:22:10 PDT 2024


================
@@ -2658,21 +2676,102 @@ IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
   return InvertedMask;
 }
 
+void IGroupLPDAGMutation::addSchedGroupBarrierRules() {
+
+  /// Whether or not the instruction has no true data predecessors
+  /// with opcode \p Opc.
+  class NoOpcDataPred : public InstructionRule {
+  protected:
+    unsigned Opc;
+
+  public:
+    bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+               SmallVectorImpl<SchedGroup> &SyncPipe) override {
+      return !std::any_of(
+          SU->Preds.begin(), SU->Preds.end(), [this](const SDep &Pred) {
+            return Pred.getKind() == SDep::Data &&
+                   Pred.getSUnit()->getInstr()->getOpcode() == Opc;
+          });
+    }
+
+    NoOpcDataPred(unsigned Opc, const SIInstrInfo *TII, unsigned SGID,
+                  bool NeedsCache = false)
+        : InstructionRule(TII, SGID, NeedsCache), Opc(Opc) {}
+  };
+
+  /// Whether or not the instruction has no write after read predecessors
+  /// with opcode \p Opc.
+  class NoOpcWARPred final : public InstructionRule {
+  protected:
+    unsigned Opc;
+
+  public:
+    bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+               SmallVectorImpl<SchedGroup> &SyncPipe) override {
+      return !std::any_of(
+          SU->Preds.begin(), SU->Preds.end(), [this](const SDep &Pred) {
+            return Pred.getKind() == SDep::Anti &&
+                   Pred.getSUnit()->getInstr()->getOpcode() == Opc;
+          });
+    }
+    NoOpcWARPred(unsigned Opc, const SIInstrInfo *TII, unsigned SGID,
+                 bool NeedsCache = false)
+        : InstructionRule(TII, SGID, NeedsCache), Opc(Opc){};
+  };
+
+  SchedGroupBarrierRuleCallBacks = {
+      [](unsigned SGID, const SIInstrInfo *TII) {
+        return std::make_shared<NoOpcWARPred>(AMDGPU::V_CNDMASK_B32_e64, TII,
----------------
arsenm wrote:

There's basically no reason to ever use shared_ptr, something is wrong if it's necessary over unique_ptr 

https://github.com/llvm/llvm-project/pull/85304


More information about the cfe-commits mailing list