[llvm] [NVPTX] Add Volta Load/Store Atomics (.relaxed, .acquire, .release) and Volatile (.mmio/.volatile) support (PR #98022)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 8 06:38:36 PDT 2024
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 414c74149c0085e3c11496af171217d5317481e1 7eb311d7b0d5db61b1693f5fab7ade1ded000746 -- llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp llvm/lib/Target/NVPTX/NVPTX.h llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index c96e5a8878..be0ca3b6e6 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -227,26 +227,27 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
if (!strcmp(Modifier, "sem")) {
switch (Imm) {
case NVPTX::PTXLdStInstCode::NotAtomic:
- break;
+ break;
case NVPTX::PTXLdStInstCode::Volatile:
- O << ".volatile";
- break;
+ O << ".volatile";
+ break;
case NVPTX::PTXLdStInstCode::Relaxed:
- O << ".relaxed.sys";
- break;
+ O << ".relaxed.sys";
+ break;
case NVPTX::PTXLdStInstCode::Acquire:
- O << ".acquire.sys";
- break;
+ O << ".acquire.sys";
+ break;
case NVPTX::PTXLdStInstCode::Release:
- O << ".release.sys";
- break;
+ O << ".release.sys";
+ break;
case NVPTX::PTXLdStInstCode::RelaxedMMIO:
- O << ".mmio.relaxed.sys";
- break;
+ O << ".mmio.relaxed.sys";
+ break;
default:
SmallString<256> Msg;
raw_svector_ostream OS(Msg);
- OS << "NVPTX LdStCode Printer does not support \"" << Imm << "\" sem modifier.";
+ OS << "NVPTX LdStCode Printer does not support \"" << Imm
+ << "\" sem modifier.";
report_fatal_error(OS.str());
break;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 8c25c79db4..148ae15855 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -700,104 +700,149 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
return NVPTX::PTXLdStInstCode::GENERIC;
}
-static unsigned int getCodeMemorySemantic(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
+static unsigned int getCodeMemorySemantic(MemSDNode *N,
+ const NVPTXSubtarget *Subtarget) {
AtomicOrdering Ordering = N->getSuccessOrdering();
auto CodeAddrSpace = getCodeAddrSpace(N);
// Supports relaxed, acquire, release, weak:
- bool hasAtomics = Subtarget->getPTXVersion() >= 60 && Subtarget->getSmVersion() >= 70;
+ bool hasAtomics =
+ Subtarget->getPTXVersion() >= 60 && Subtarget->getSmVersion() >= 70;
// Supports mmio:
- bool hasRelaxedMMIO = Subtarget->getPTXVersion() >= 82 && Subtarget->getSmVersion() >= 70;
+ bool hasRelaxedMMIO =
+ Subtarget->getPTXVersion() >= 82 && Subtarget->getSmVersion() >= 70;
// TODO: lowering for SequentiallyConsistent Operations: for now, we error.
// TODO: lowering for AcquireRelease Operations: for now, we error.
//
// Lowering for non-SequentiallyConsistent Operations
- //
- // | Atomic | Volatile | Statespace | Lowering sm_60- | Lowering sm_70+ |
+ //
+ // | Atomic | Volatile | Statespace | Lowering sm_60- |
+ // Lowering sm_70+ |
// |---------|----------|-------------------------------|-----------------|------------------------------------------------------|
- // | No | No | All | plain | .weak |
- // | No | Yes | Generic / Shared / Global [0] | .volatile | .volatile |
- // | No | Yes | Local / Const / Param | plain [1] | .weak [1] |
- // | Relaxed | No | Generic / Shared / Global [0] | .volatile | <atomic sem> |
- // | Other | No | Generic / Shared / Global [0] | Error [2] | <atomic sem> |
- // | Yes | No | Local / Const / Param | plain [1] | .weak [1] |
- // | Relaxed | Yes | Generic / Shared [0] | .volatile | .volatile |
- // | Relaxed | Yes | Global [0] | .volatile | .mmio.relaxed.sys (PTX 8.2+) or .volatile (PTX 8.1-) |
- // | Relaxed | Yes | Local / Const / Param | plain [1] | .weak [1] |
- // | Other | Yes | Generic / Shared / Global [0] | Error [4] | <atomic sem> [3] |
+ // | No | No | All | plain |
+ // .weak | | No | Yes |
+ // Generic / Shared / Global [0] | .volatile | .volatile | | No |
+ // Yes | Local / Const / Param | plain [1] | .weak [1] | |
+ // Relaxed | No | Generic / Shared / Global [0] | .volatile |
+ // <atomic sem> | | Other | No |
+ // Generic / Shared / Global [0] | Error [2] | <atomic sem> | | Yes | No
+ // | Local / Const / Param | plain [1] | .weak [1] | | Relaxed |
+ // Yes | Generic / Shared [0] | .volatile | .volatile | |
+ // Relaxed | Yes | Global [0] | .volatile |
+ // .mmio.relaxed.sys (PTX 8.2+) or .volatile (PTX 8.1-) | | Relaxed | Yes |
+ // Local / Const / Param | plain [1] | .weak [1] | | Other |
+ // Yes | Generic / Shared / Global [0] | Error [4] | <atomic sem>
+ // [3] |
//
- // [0]: volatile and atomics are only supported on generic addressing to shared or global, or shared, or global.
- // MMIO requires generic addressing to global or global, but (TODO) we only implement it for global.
- // [1]: TODO: this implementation exhibits PTX Undefined Behavior; it fails to preserve the side-effects of atomics and volatile
- // accesses in LLVM IR to local / const / param, causing well-formed LLVM-IR & CUDA C++ programs to be miscompiled in sm_70+.
- if (CodeAddrSpace == NVPTX::PTXLdStInstCode::LOCAL || CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT || CodeAddrSpace == NVPTX::PTXLdStInstCode::PARAM) {
+ // [0]: volatile and atomics are only supported on generic addressing to
+ // shared or global, or shared, or global.
+ // MMIO requires generic addressing to global or global, but (TODO) we
+ // only implement it for global.
+ // [1]: TODO: this implementation exhibits PTX Undefined Behavior; it fails to
+ // preserve the side-effects of atomics and volatile
+ // accesses in LLVM IR to local / const / param, causing well-formed
+ // LLVM-IR & CUDA C++ programs to be miscompiled in sm_70+.
+ if (CodeAddrSpace == NVPTX::PTXLdStInstCode::LOCAL ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::PARAM) {
return NVPTX::PTXLdStInstCode::NotAtomic;
}
- // [2]: Atomics with Ordering different than Relaxed are not supported on sm_60 and older.
- if (!(Ordering == AtomicOrdering::NotAtomic || Ordering == AtomicOrdering::Monotonic) && !hasAtomics) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX does not support \"atomic\" for orderings different than \"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order is: \"" << toIRString(Ordering) << "\".";
- report_fatal_error(OS.str());
+ // [2]: Atomics with Ordering different than Relaxed are not supported on
+ // sm_60 and older.
+ if (!(Ordering == AtomicOrdering::NotAtomic ||
+ Ordering == AtomicOrdering::Monotonic) &&
+ !hasAtomics) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX does not support \"atomic\" for orderings different than "
+ "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order is: \""
+ << toIRString(Ordering) << "\".";
+ report_fatal_error(OS.str());
}
- // [3]: TODO: these should eventually use .mmio<.atomic sem>; for now we drop the volatile semantics and preserve the atomic ones.
- // [4]: TODO: volatile atomics with order stronger than relaxed are currently unimplemented in sm_60 and older..
- if (!hasAtomics && N->isVolatile() && !(Ordering == AtomicOrdering::NotAtomic || Ordering == AtomicOrdering::Monotonic)) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX does not support \"volatile atomic\" for orderings different than \"NotAtomic\" or \"Monotonic\" for sm_60 and older, but order is: \"" << toIRString(Ordering) << "\".";
- report_fatal_error(OS.str());
+ // [3]: TODO: these should eventually use .mmio<.atomic sem>; for now we drop
+ // the volatile semantics and preserve the atomic ones. [4]: TODO: volatile
+ // atomics with order stronger than relaxed are currently unimplemented in
+ // sm_60 and older..
+ if (!hasAtomics && N->isVolatile() &&
+ !(Ordering == AtomicOrdering::NotAtomic ||
+ Ordering == AtomicOrdering::Monotonic)) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX does not support \"volatile atomic\" for orderings different "
+ "than \"NotAtomic\" or \"Monotonic\" for sm_60 and older, but order "
+ "is: \""
+ << toIRString(Ordering) << "\".";
+ report_fatal_error(OS.str());
}
- // PTX volatile and PTX atomics are not available for statespace that differ from .generic, .global, or .shared.
- // The behavior of PTX volatile and PTX atomics is undefined if the generic address does not refer to a .global or .shared memory location.
- bool addrGenericOrGlobalOrShared = (CodeAddrSpace == NVPTX::PTXLdStInstCode::GENERIC
- || CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL
- || CodeAddrSpace == NVPTX::PTXLdStInstCode::SHARED);
- bool useRelaxedMMIO = hasRelaxedMMIO && CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL;
+ // PTX volatile and PTX atomics are not available for statespace that differ
+ // from .generic, .global, or .shared. The behavior of PTX volatile and PTX
+ // atomics is undefined if the generic address does not refer to a .global or
+ // .shared memory location.
+ bool addrGenericOrGlobalOrShared =
+ (CodeAddrSpace == NVPTX::PTXLdStInstCode::GENERIC ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::SHARED);
+ bool useRelaxedMMIO =
+ hasRelaxedMMIO && CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL;
switch (Ordering) {
- case AtomicOrdering::NotAtomic: return N->isVolatile() && addrGenericOrGlobalOrShared? NVPTX::PTXLdStInstCode::Volatile: NVPTX::PTXLdStInstCode::NotAtomic;
- case AtomicOrdering::Monotonic:
- if (N->isVolatile()) return useRelaxedMMIO? NVPTX::PTXLdStInstCode::RelaxedMMIO: addrGenericOrGlobalOrShared? NVPTX::PTXLdStInstCode::Volatile: NVPTX::PTXLdStInstCode::NotAtomic;
- else return hasAtomics? NVPTX::PTXLdStInstCode::Relaxed: addrGenericOrGlobalOrShared? NVPTX::PTXLdStInstCode::Volatile: NVPTX::PTXLdStInstCode::NotAtomic;
- case AtomicOrdering::Acquire:
- if (!N->readMem()) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX only supports Acquire Ordering on reads: " << N->getOperationName();
- N->print(OS);
- report_fatal_error(OS.str());
- }
- return addrGenericOrGlobalOrShared? NVPTX::PTXLdStInstCode::Acquire: NVPTX::PTXLdStInstCode::NotAtomic;
- case AtomicOrdering::Release:
- if (!N->writeMem()) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX only supports Release Ordering on writes: " << N->getOperationName();
- N->print(OS);
- report_fatal_error(OS.str());
- }
- return addrGenericOrGlobalOrShared? NVPTX::PTXLdStInstCode::Release: NVPTX::PTXLdStInstCode::NotAtomic;
- case AtomicOrdering::AcquireRelease: {
+ case AtomicOrdering::NotAtomic:
+ return N->isVolatile() && addrGenericOrGlobalOrShared
+ ? NVPTX::PTXLdStInstCode::Volatile
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ case AtomicOrdering::Monotonic:
+ if (N->isVolatile())
+ return useRelaxedMMIO ? NVPTX::PTXLdStInstCode::RelaxedMMIO
+ : addrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ else
+ return hasAtomics ? NVPTX::PTXLdStInstCode::Relaxed
+ : addrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ case AtomicOrdering::Acquire:
+ if (!N->readMem()) {
SmallString<256> Msg;
raw_svector_ostream OS(Msg);
- OS << "PTX only supports AcquireRelease Ordering on read-modify-write: " << N->getOperationName();
+ OS << "PTX only supports Acquire Ordering on reads: "
+ << N->getOperationName();
N->print(OS);
report_fatal_error(OS.str());
}
- case AtomicOrdering::SequentiallyConsistent:
- case AtomicOrdering::Unordered:
- default: {
- // TODO: support AcquireRelease and SequentiallyConsistent
+ return addrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Acquire
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ case AtomicOrdering::Release:
+ if (!N->writeMem()) {
SmallString<256> Msg;
raw_svector_ostream OS(Msg);
- OS << "NVPTX backend does not support AtomicOrdering \"" << toIRString(Ordering) << "\" yet.";
+ OS << "PTX only supports Release Ordering on writes: "
+ << N->getOperationName();
+ N->print(OS);
report_fatal_error(OS.str());
}
+ return addrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Release
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ case AtomicOrdering::AcquireRelease: {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX only supports AcquireRelease Ordering on read-modify-write: "
+ << N->getOperationName();
+ N->print(OS);
+ report_fatal_error(OS.str());
+ }
+ case AtomicOrdering::SequentiallyConsistent:
+ case AtomicOrdering::Unordered:
+ default: {
+ // TODO: support AcquireRelease and SequentiallyConsistent
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "NVPTX backend does not support AtomicOrdering \""
+ << toIRString(Ordering) << "\" yet.";
+ report_fatal_error(OS.str());
+ }
}
report_fatal_error("unreachable");
@@ -1057,9 +1102,13 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(CodeMemorySem, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Addr, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Addr,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
@@ -1068,9 +1117,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(CodeMemorySem, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
@@ -1085,9 +1139,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(CodeMemorySem, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (PointerSize == 64)
@@ -1101,9 +1160,13 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(CodeMemorySem, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), N1, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ N1,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
}
@@ -1208,9 +1271,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(CodeMemorySem, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Addr, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Addr,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
@@ -1233,9 +1300,14 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(CodeMemorySem, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Base,
+ Offset,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
@@ -1278,9 +1350,14 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(CodeMemorySem, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Base,
+ Offset,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else {
@@ -1323,9 +1400,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(CodeMemorySem, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Op1, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Op1,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/98022
More information about the llvm-commits
mailing list