[llvm] [NVPTX] Add Volta Load/Store Atomics (.relaxed, .acquire, .release) and Volatile (.mmio/.volatile) support (PR #98022)

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 11 15:33:20 PDT 2024


================
@@ -700,6 +700,140 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
   return NVPTX::PTXLdStInstCode::GENERIC;
 }
 
+static unsigned int getCodeMemorySemantic(MemSDNode *N,
+                                          const NVPTXSubtarget *Subtarget) {
+  AtomicOrdering Ordering = N->getSuccessOrdering();
+  auto CodeAddrSpace = getCodeAddrSpace(N);
+
+  bool HasMemoryOrdering = Subtarget->hasMemoryOrdering();
+  bool HasRelaxedMMIO = Subtarget->hasRelaxedMMIO();
+
+  // TODO: lowering for SequentiallyConsistent Operations: for now, we error.
+  // TODO: lowering for AcquireRelease Operations: for now, we error.
+  //
+
+  // clang-format off
+
+  // Lowering for non-SequentiallyConsistent Operations
+  //
+  // | Atomic  | Volatile | Statespace                    | Lowering sm_60- | Lowering sm_70+                                      |
+  // |---------|----------|-------------------------------|-----------------|------------------------------------------------------|
+  // | No      | No       | All                           | plain           | .weak                                                |
+  // | No      | Yes      | Generic / Shared / Global [0] | .volatile       | .volatile                                            |
+  // | No      | Yes      | Local / Const / Param         | plain [1]       | .weak [1]                                            |
+  // | Relaxed | No       | Generic / Shared / Global [0] | .volatile       | <atomic sem>                                         |
+  // | Other   | No       | Generic / Shared / Global [0] | Error [2]       | <atomic sem>                                         |
+  // | Yes     | No       | Local / Const / Param         | plain [1]       | .weak [1]                                            |
+  // | Relaxed | Yes      | Generic / Shared [0]          | .volatile       | .volatile                                            |
+  // | Relaxed | Yes      | Global [0]                    | .volatile       | .mmio.relaxed.sys (PTX 8.2+) or .volatile (PTX 8.1-) |
+  // | Relaxed | Yes      | Local / Const / Param         | plain [1]       | .weak [1]                                            |
+  // | Other   | Yes      | Generic / Shared / Global [0] | Error [2]       | <atomic sem> [3]                                     |
+
+  // clang-format on
+
+  // [0]: volatile and atomics are only supported on generic addressing to
+  //      shared or global, or shared, or global.
+  //      MMIO requires generic addressing to global or global, but
+  // (TODO) we only implement it for global.
+
+  // [1]: TODO: this implementation exhibits PTX Undefined Behavior; it
+  //      fails to preserve the side-effects of atomics and volatile
+  //      accesses in LLVM IR to local / const / param, causing
+  //      well-formed LLVM-IR & CUDA C++ programs to be miscompiled
+  //      in sm_70+.
+
+  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::LOCAL ||
+      CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT ||
+      CodeAddrSpace == NVPTX::PTXLdStInstCode::PARAM) {
+    return NVPTX::PTXLdStInstCode::NotAtomic;
+  }
+
+  // [2]: Atomics with Ordering different than Relaxed are not supported on
+  //      sm_60 and older; this includes volatile atomics.
+  if (!(Ordering == AtomicOrdering::NotAtomic ||
+        Ordering == AtomicOrdering::Monotonic) &&
+      !HasMemoryOrdering) {
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "PTX does not support \"atomic\" for orderings different than"
+          "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order is: \""
+       << toIRString(Ordering) << "\".";
+    report_fatal_error(OS.str());
+  }
+
+  // [3]: TODO: these should eventually use .mmio<.atomic sem>; for now we drop
+  // the volatile semantics and preserve the atomic ones.
+
+  // PTX volatile and PTX atomics are not available for statespace that differ
+  // from .generic, .global, or .shared. The behavior of PTX volatile and PTX
+  // atomics is undefined if the generic address does not refer to a .global or
+  // .shared memory location.
+  bool AddrGenericOrGlobalOrShared =
+      (CodeAddrSpace == NVPTX::PTXLdStInstCode::GENERIC ||
+       CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL ||
+       CodeAddrSpace == NVPTX::PTXLdStInstCode::SHARED);
+  bool UseRelaxedMMIO =
+      HasRelaxedMMIO && CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL;
+
+  switch (Ordering) {
+  case AtomicOrdering::NotAtomic:
+    return N->isVolatile() && AddrGenericOrGlobalOrShared
+               ? NVPTX::PTXLdStInstCode::Volatile
+               : NVPTX::PTXLdStInstCode::NotAtomic;
+  case AtomicOrdering::Monotonic:
+    if (N->isVolatile())
+      return UseRelaxedMMIO                ? NVPTX::PTXLdStInstCode::RelaxedMMIO
+             : AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
+                                           : NVPTX::PTXLdStInstCode::NotAtomic;
+    else
+      return HasMemoryOrdering             ? NVPTX::PTXLdStInstCode::Relaxed
+             : AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
+                                           : NVPTX::PTXLdStInstCode::NotAtomic;
+  case AtomicOrdering::Acquire:
+    if (!N->readMem()) {
+      SmallString<256> Msg;
+      raw_svector_ostream OS(Msg);
+      OS << "PTX only supports Acquire Ordering on reads: "
+         << N->getOperationName();
+      N->print(OS);
+      report_fatal_error(OS.str());
+    }
+    return AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Acquire
+                                       : NVPTX::PTXLdStInstCode::NotAtomic;
+  case AtomicOrdering::Release:
+    if (!N->writeMem()) {
+      SmallString<256> Msg;
+      raw_svector_ostream OS(Msg);
+      OS << "PTX only supports Release Ordering on writes: "
+         << N->getOperationName();
+      N->print(OS);
+      report_fatal_error(OS.str());
+    }
+    return AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Release
+                                       : NVPTX::PTXLdStInstCode::NotAtomic;
+  case AtomicOrdering::AcquireRelease: {
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "PTX only supports AcquireRelease Ordering on read-modify-write: "
+       << N->getOperationName();
+    N->print(OS);
+    report_fatal_error(OS.str());
+  }
+  case AtomicOrdering::SequentiallyConsistent:
+  case AtomicOrdering::Unordered:
+  default: {
+    // TODO: support AcquireRelease and SequentiallyConsistent
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "NVPTX backend does not support AtomicOrdering \""
+       << toIRString(Ordering) << "\" yet.";
+    report_fatal_error(OS.str());
+  }
+  }
+
+  report_fatal_error("unreachable");
----------------
Artem-B wrote:

If this is something that indicate an issue with the source IR, it should have a more meaningful message.
If that's an error in LLVM code, then it should be `llvm_unreachable()`

https://github.com/llvm/llvm-project/pull/98022


More information about the llvm-commits mailing list