[llvm] [NVPTX] Add missing mbarrier intrinsics (PR #164864)
Durgadoss R via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 24 01:23:29 PDT 2025
================
@@ -1082,6 +1082,160 @@ let Predicates = [hasPTX<70>, hasSM<80>] in {
"mbarrier.pending_count.b64",
[(set i32:$res, (int_nvvm_mbarrier_pending_count i64:$state))]>;
}
+
+class MBAR_UTIL<string op, string scope,
+ string space = "", string sem = "",
+ bit tl = 0, bit parity = 0> {
+ // The mbarrier instructions in PTX ISA are of the general form:
+ // mbarrier.op.semantics.scope.space.b64 arg1, arg2 ...
+ // where:
+ // op -> arrive, expect_tx, complete_tx, arrive.expect_tx etc.
+ // semantics -> acquire, release, relaxed (default depends on the op)
+ // scope -> cta or cluster (default is cta-scope)
+ // space -> shared::cta or shared::cluster (default is shared::cta)
+ //
+ // The 'semantics' and 'scope' go together. If one is specified,
+ // then the other _must_ be specified. For example:
+ // (A) mbarrier.arrive <args> (valid, release and cta are default)
+ // (B) mbarrier.arrive.release.cta <args> (valid, sem/scope mentioned explicitly)
+ // (C) mbarrier.arrive.release <args> (invalid, needs scope)
+ // (D) mbarrier.arrive.cta <args> (invalid, needs order)
+ //
+ // Wherever possible, we prefer form (A) to (B) since it is available
+ // from early PTX versions. In most cases, explicitly specifying the
+ // scope requires a later version of PTX.
+ string _scope_asm = !cond(
+ !eq(scope, "scope_cluster") : "cluster",
+ !eq(scope, "scope_cta") : !if(!empty(sem), "", "cta"),
+ true : scope);
+ string _space_asm = !cond(
+ !eq(space, "space_cta") : "shared",
+ !eq(space, "space_cluster") : "shared::cluster",
+ true : space);
+
+ string _parity = !if(parity, "parity", "");
+ string asm_str = StrJoin<".", ["mbarrier", op, _parity,
+ sem, _scope_asm, _space_asm, "b64"]>.ret;
+
+ string _intr_suffix = StrJoin<"_", [!subst(".", "_", op), _parity,
+ !if(tl, "tl", ""),
+ sem, scope, space]>.ret;
+ string intr_name = "int_nvvm_mbarrier_" # _intr_suffix;
+
+ // Predicate checks:
+ // These are used only for the "test_wait/try_wait" variants as they
+ // have evolved since sm80 and are complex. Predicates for the
+ // remaining instructions are straightforward and applied directly.
----------------
durga4github wrote:
Sure, updated in the latest revision.
Resolving this.
https://github.com/llvm/llvm-project/pull/164864
More information about the llvm-commits
mailing list