[llvm] 2da30f8 - [NVPTX] Add Volta Load/Store Atomics (.relaxed, .acquire, .release) and Volatile (.mmio/.volatile) support (#98022)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 15 13:16:49 PDT 2024
Author: gonzalobg
Date: 2024-07-15T13:16:45-07:00
New Revision: 2da30f89869c493f1bc1ffbd8b458975103af0ac
URL: https://github.com/llvm/llvm-project/commit/2da30f89869c493f1bc1ffbd8b458975103af0ac
DIFF: https://github.com/llvm/llvm-project/commit/2da30f89869c493f1bc1ffbd8b458975103af0ac.diff
LOG: [NVPTX] Add Volta Load/Store Atomics (.relaxed, .acquire, .release) and Volatile (.mmio/.volatile) support (#98022)
This PR adds initial support for some of Volta's (sm_70) load/store
atomic and volatile/MMIO operations, hopefully without breaking any
preexisting code. Only relaxed, acquire, and release operations w/
volatile are handled.
This PR does not aim to add support for any of the following:
- syncscope support
- read atomic ops to const, param, grid param
- local memory atomics
- sequentially consistent atomics
- atomicrmw
- ...
Added:
llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
Modified:
llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
llvm/lib/Target/NVPTX/NVPTX.h
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/lib/Target/NVPTX/NVPTXSubtarget.h
llvm/test/CodeGen/NVPTX/load-store.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 380d878c1f532..a004d64c21cc6 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -227,9 +227,33 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
if (Modifier) {
const MCOperand &MO = MI->getOperand(OpNum);
int Imm = (int) MO.getImm();
- if (!strcmp(Modifier, "volatile")) {
- if (Imm)
+ if (!strcmp(Modifier, "sem")) {
+ switch (Imm) {
+ case NVPTX::PTXLdStInstCode::NotAtomic:
+ break;
+ case NVPTX::PTXLdStInstCode::Volatile:
O << ".volatile";
+ break;
+ case NVPTX::PTXLdStInstCode::Relaxed:
+ O << ".relaxed.sys";
+ break;
+ case NVPTX::PTXLdStInstCode::Acquire:
+ O << ".acquire.sys";
+ break;
+ case NVPTX::PTXLdStInstCode::Release:
+ O << ".release.sys";
+ break;
+ case NVPTX::PTXLdStInstCode::RelaxedMMIO:
+ O << ".mmio.relaxed.sys";
+ break;
+ default:
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "NVPTX LdStCode Printer does not support \"" << Imm
+ << "\" sem modifier.";
+ report_fatal_error(OS.str());
+ break;
+ }
} else if (!strcmp(Modifier, "addsp")) {
switch (Imm) {
case NVPTX::PTXLdStInstCode::GLOBAL:
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index b0cb24c63c3ce..3c7167b157025 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -107,6 +107,14 @@ enum LoadStore {
};
namespace PTXLdStInstCode {
+enum MemorySemantic {
+ NotAtomic = 0, // PTX calls these: "Weak"
+ Volatile = 1,
+ Relaxed = 2,
+ Acquire = 3,
+ Release = 4,
+ RelaxedMMIO = 5
+};
enum AddressSpace {
GENERIC = 0,
GLOBAL = 1,
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 11193c11ede3b..371ec8596ef63 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -714,6 +714,170 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
return NVPTX::PTXLdStInstCode::GENERIC;
}
+static unsigned int getCodeMemorySemantic(MemSDNode *N,
+ const NVPTXSubtarget *Subtarget) {
+ AtomicOrdering Ordering = N->getSuccessOrdering();
+ auto CodeAddrSpace = getCodeAddrSpace(N);
+
+ bool HasMemoryOrdering = Subtarget->hasMemoryOrdering();
+ bool HasRelaxedMMIO = Subtarget->hasRelaxedMMIO();
+
+ // TODO: lowering for SequentiallyConsistent Operations: for now, we error.
+ // TODO: lowering for AcquireRelease Operations: for now, we error.
+ //
+
+ // clang-format off
+
+ // Lowering for non-SequentiallyConsistent Operations
+ //
+ // | Atomic | Volatile | Statespace | PTX sm_60- | PTX sm_70+ |
+ // |---------|----------|--------------------|------------|------------------------------|
+ // | No | No | All | plain | .weak |
+ // | No | Yes | Generic,Shared, | .volatile | .volatile |
+ // | | | Global [0] | | |
+ // | No | Yes | Local,Const,Param | plain [1] | .weak [1] |
+ // | Relaxed | No | Generic,Shared, | | |
+ // | | | Global [0] | .volatile | <atomic sem> |
+ // | Other | No | Generic,Shared, | Error [2] | <atomic sem> |
+ // | | | Global [0] | | |
+ // | Yes | No | Local,Const,Param | plain [1] | .weak [1] |
+ // | Relaxed | Yes | Generic,Shared [0] | .volatile | .volatile |
+ // | Relaxed | Yes | Global [0] | .volatile | .mmio.relaxed.sys (PTX 8.2+) |
+ // | | | | | or .volatile (PTX 8.1-) |
+ // | Relaxed | Yes | Local,Const,Param | plain [1] | .weak [1] |
+ // | Other | Yes | Generic, Shared, | Error [2] | <atomic sem> [3] |
+ // | | | / Global [0] | | |
+
+ // clang-format on
+
+ // [0]: volatile and atomics are only supported on global or shared
+ // memory locations, accessed via generic/shared/global pointers.
+ // MMIO is only supported on global memory locations,
+ // accessed via generic/global pointers.
+ // TODO: Implement MMIO access via generic pointer to global.
+ // Currently implemented for global pointers only.
+
+ // [1]: Lowering volatile/atomic operations to non-volatile/non-atomic
+ // PTX instructions fails to preserve their C++ side-effects.
+ //
+ // Example (https://github.com/llvm/llvm-project/issues/62057):
+ //
+ // void example() {
+ // std::atomic<bool> True = true;
+ // while (True.load(std::memory_order_relaxed));
+ // }
+ //
+ // A C++ program that calls "example" is well-defined: the infinite loop
+ // performs an atomic operation. By lowering volatile/atomics to
+ // "weak" memory operations, we are transforming the above into:
+ //
+ // void undefined_behavior() {
+ // bool True = true;
+ // while (True);
+ // }
+ //
+ // which exhibits undefined behavior in both C++ and PTX.
+ //
+ // Calling "example" in CUDA C++ compiled for sm_60- exhibits undefined
+ // behavior due to lack of Independent Forward Progress. Lowering these
+ // to weak memory operations in sm_60- is therefore fine.
+ //
+ // TODO: lower atomic and volatile operations to memory locations
+ // in local, const, and param to two PTX instructions in sm_70+:
+ // - the "weak" memory instruction we are currently lowering to, and
+ // - some other instruction that preserves the side-effect, e.g.,
+ // a dead dummy volatile load.
+
+ if (CodeAddrSpace == NVPTX::PTXLdStInstCode::LOCAL ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::PARAM) {
+ return NVPTX::PTXLdStInstCode::NotAtomic;
+ }
+
+ // [2]: Atomics with Ordering
diff erent than Relaxed are not supported on
+ // sm_60 and older; this includes volatile atomics.
+ if (!(Ordering == AtomicOrdering::NotAtomic ||
+ Ordering == AtomicOrdering::Monotonic) &&
+ !HasMemoryOrdering) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX does not support \"atomic\" for orderings
diff erent than"
+ "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order is: \""
+ << toIRString(Ordering) << "\".";
+ report_fatal_error(OS.str());
+ }
+
+ // [3]: TODO: these should eventually use .mmio<.atomic sem>; for now we drop
+ // the volatile semantics and preserve the atomic ones.
+
+ // PTX volatile and PTX atomics are not available for statespace that
diff er
+ // from .generic, .global, or .shared. The behavior of PTX volatile and PTX
+ // atomics is undefined if the generic address does not refer to a .global or
+ // .shared memory location.
+ bool AddrGenericOrGlobalOrShared =
+ (CodeAddrSpace == NVPTX::PTXLdStInstCode::GENERIC ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL ||
+ CodeAddrSpace == NVPTX::PTXLdStInstCode::SHARED);
+ bool UseRelaxedMMIO =
+ HasRelaxedMMIO && CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL;
+
+ switch (Ordering) {
+ case AtomicOrdering::NotAtomic:
+ return N->isVolatile() && AddrGenericOrGlobalOrShared
+ ? NVPTX::PTXLdStInstCode::Volatile
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ case AtomicOrdering::Monotonic:
+ if (N->isVolatile())
+ return UseRelaxedMMIO ? NVPTX::PTXLdStInstCode::RelaxedMMIO
+ : AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ else
+ return HasMemoryOrdering ? NVPTX::PTXLdStInstCode::Relaxed
+ : AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ case AtomicOrdering::Acquire:
+ if (!N->readMem()) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX only supports Acquire Ordering on reads: "
+ << N->getOperationName();
+ N->print(OS);
+ report_fatal_error(OS.str());
+ }
+ return AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Acquire
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ case AtomicOrdering::Release:
+ if (!N->writeMem()) {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX only supports Release Ordering on writes: "
+ << N->getOperationName();
+ N->print(OS);
+ report_fatal_error(OS.str());
+ }
+ return AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Release
+ : NVPTX::PTXLdStInstCode::NotAtomic;
+ case AtomicOrdering::AcquireRelease: {
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "PTX only supports AcquireRelease Ordering on read-modify-write: "
+ << N->getOperationName();
+ N->print(OS);
+ report_fatal_error(OS.str());
+ }
+ case AtomicOrdering::SequentiallyConsistent:
+ case AtomicOrdering::Unordered:
+ // TODO: support AcquireRelease and SequentiallyConsistent
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "NVPTX backend does not support AtomicOrdering \""
+ << toIRString(Ordering) << "\" yet.";
+ report_fatal_error(OS.str());
+ }
+
+ llvm_unreachable("unexpected unhandled case");
+}
+
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
unsigned CodeAddrSpace, MachineFunction *F) {
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
@@ -916,32 +1080,18 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
if (!LoadedVT.isSimple())
return false;
- AtomicOrdering Ordering = LD->getSuccessOrdering();
- // In order to lower atomic loads with stronger guarantees we would need to
- // use load.acquire or insert fences. However these features were only added
- // with PTX ISA 6.0 / sm_70.
- // TODO: Check if we can actually use the new instructions and implement them.
- if (isStrongerThanMonotonic(Ordering))
- return false;
-
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
return tryLDGLDU(N);
}
+ // Memory Semantic Setting
+ unsigned int CodeMemorySem = getCodeMemorySemantic(LD, Subtarget);
+
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
- // Volatile Setting
- // - .volatile is only available for .global and .shared
- // - .volatile has the same memory synchronization semantics as .relaxed.sys
- bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
- if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
- isVolatile = false;
-
// Type Setting: fromType + fromTypeWidth
//
// Sign : ISD::SEXTLOAD
@@ -982,9 +1132,13 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Addr, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Addr,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
@@ -993,9 +1147,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
@@ -1010,9 +1169,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ Base,
+ Offset,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (PointerSize == 64)
@@ -1026,9 +1190,13 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl), getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl), N1, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
+ getI32Imm(CodeAddrSpace, dl),
+ getI32Imm(vecType, dl),
+ getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl),
+ N1,
+ Chain};
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
}
@@ -1065,13 +1233,8 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
- // Volatile Setting
- // - .volatile is only availalble for .global and .shared
- bool IsVolatile = MemSD->isVolatile();
- if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
- IsVolatile = false;
+ // Memory Semantic Setting
+ unsigned int CodeMemorySem = getCodeMemorySemantic(MemSD, Subtarget);
// Vector Setting
MVT SimpleVT = LoadedVT.getSimpleVT();
@@ -1138,9 +1301,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Addr, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Addr,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
@@ -1163,9 +1330,14 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Base,
+ Offset,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
@@ -1208,9 +1380,14 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Base,
+ Offset,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else {
@@ -1253,9 +1430,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL), Op1, Chain };
+ SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL),
+ getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL),
+ Op1,
+ Chain};
LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
}
@@ -1698,27 +1879,13 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!StoreVT.isSimple())
return false;
- AtomicOrdering Ordering = ST->getSuccessOrdering();
- // In order to lower atomic loads with stronger guarantees we would need to
- // use store.release or insert fences. However these features were only added
- // with PTX ISA 6.0 / sm_70.
- // TODO: Check if we can actually use the new instructions and implement them.
- if (isStrongerThanMonotonic(Ordering))
- return false;
-
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
- // Volatile Setting
- // - .volatile is only available for .global and .shared
- // - .volatile has the same memory synchronization semantics as .relaxed.sys
- bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
- if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
- isVolatile = false;
+ // Memory Semantic Setting
+ unsigned int CodeMemorySem = getCodeMemorySemantic(ST, Subtarget);
// Vector Setting
MVT SimpleVT = StoreVT.getSimpleVT();
@@ -1755,7 +1922,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!Opcode)
return false;
SDValue Ops[] = {Value,
- getI32Imm(isVolatile, dl),
+ getI32Imm(CodeMemorySem, dl),
getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl),
getI32Imm(toType, dl),
@@ -1772,7 +1939,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!Opcode)
return false;
SDValue Ops[] = {Value,
- getI32Imm(isVolatile, dl),
+ getI32Imm(CodeMemorySem, dl),
getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl),
getI32Imm(toType, dl),
@@ -1797,7 +1964,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
return false;
SDValue Ops[] = {Value,
- getI32Imm(isVolatile, dl),
+ getI32Imm(CodeMemorySem, dl),
getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl),
getI32Imm(toType, dl),
@@ -1819,7 +1986,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (!Opcode)
return false;
SDValue Ops[] = {Value,
- getI32Imm(isVolatile, dl),
+ getI32Imm(CodeMemorySem, dl),
getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl),
getI32Imm(toType, dl),
@@ -1858,13 +2025,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
- // Volatile Setting
- // - .volatile is only availalble for .global and .shared
- bool IsVolatile = MemSD->isVolatile();
- if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
- CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
- IsVolatile = false;
+ // Memory Semantic Setting
+ unsigned int CodeMemorySem = getCodeMemorySemantic(MemSD, Subtarget);
// Type Setting: toType + toTypeWidth
// - for integer type, always use 'u'
@@ -1906,7 +2068,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
ToTypeWidth = 32;
}
- StOps.push_back(getI32Imm(IsVolatile, DL));
+ StOps.push_back(getI32Imm(CodeMemorySem, DL));
StOps.push_back(getI32Imm(CodeAddrSpace, DL));
StOps.push_back(getI32Imm(VecType, DL));
StOps.push_back(getI32Imm(ToType, DL));
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 827febe845a4c..f37822f764bed 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2941,39 +2941,39 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
multiclass LD<NVPTXRegClass regclass> {
def _avar : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
def _areg : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
def _areg_64 : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
def _ari : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr+$offset];", []>;
def _ari_64 : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr+$offset];", []>;
def _asi : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr+$offset];", []>;
}
@@ -2989,39 +2989,39 @@ let mayLoad=1, hasSideEffects=0 in {
multiclass ST<NVPTXRegClass regclass> {
def _avar : NVPTXInst<
(outs),
- (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ (ins regclass:$src, LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
" \t[$addr], $src;", []>;
def _areg : NVPTXInst<
(outs),
- (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp,
+ (ins regclass:$src, LdStCode:$sem, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
" \t[$addr], $src;", []>;
def _areg_64 : NVPTXInst<
(outs),
- (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ (ins regclass:$src, LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
" \t[$addr], $src;", []>;
def _ari : NVPTXInst<
(outs),
- (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ (ins regclass:$src, LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
" \t[$addr+$offset], $src;", []>;
def _ari_64 : NVPTXInst<
(outs),
- (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ (ins regclass:$src, LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
" \t[$addr+$offset], $src;", []>;
def _asi : NVPTXInst<
(outs),
- (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ (ins regclass:$src, LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"
" \t[$addr+$offset], $src;", []>;
}
@@ -3040,75 +3040,75 @@ let mayStore=1, hasSideEffects=0 in {
multiclass LD_VEC<NVPTXRegClass regclass> {
def _v2_avar : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr];", []>;
def _v2_areg : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr];", []>;
def _v2_areg_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr];", []>;
def _v2_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
def _v2_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
def _v2_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2}}, [$addr+$offset];", []>;
def _v4_avar : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
def _v4_areg : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
def _v4_areg_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
def _v4_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
def _v4_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
def _v4_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
- (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
- "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
@@ -3123,84 +3123,84 @@ let mayLoad=1, hasSideEffects=0 in {
multiclass ST_VEC<NVPTXRegClass regclass> {
def _v2_avar : NVPTXInst<
(outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr], {{$src1, $src2}};", []>;
def _v2_areg : NVPTXInst<
(outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr], {{$src1, $src2}};", []>;
def _v2_areg_64 : NVPTXInst<
(outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr], {{$src1, $src2}};", []>;
def _v2_ari : NVPTXInst<
(outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
i32imm:$offset),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2}};", []>;
def _v2_ari_64 : NVPTXInst<
(outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
i32imm:$offset),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2}};", []>;
def _v2_asi : NVPTXInst<
(outs),
- (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ (ins regclass:$src1, regclass:$src2, LdStCode:$sem, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
i32imm:$offset),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2}};", []>;
def _v4_avar : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_areg : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_areg_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_ari : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
def _v4_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
- LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
- "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}"
+ "st${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}"
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 3ca4c1a24c79a..8df41913ff12e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -78,13 +78,18 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasAtomBitwise64() const { return SmVersion >= 32; }
bool hasAtomMinMax64() const { return SmVersion >= 32; }
bool hasLDG() const { return SmVersion >= 32; }
- inline bool hasHWROT32() const { return SmVersion >= 32; }
+ bool hasHWROT32() const { return SmVersion >= 32; }
bool hasImageHandles() const;
bool hasFP16Math() const { return SmVersion >= 53; }
bool hasBF16Math() const { return SmVersion >= 80; }
bool allowFP16Math() const;
bool hasMaskOperator() const { return PTXVersion >= 71; }
bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
+ // Does SM & PTX support memory orderings (weak and atomic: relaxed, acquire,
+ // release, acq_rel, sc) ?
+ bool hasMemoryOrdering() const { return SmVersion >= 70 && PTXVersion >= 60; }
+ // Does SM & PTX support atomic relaxed MMIO operations ?
+ bool hasRelaxedMMIO() const { return SmVersion >= 70 && PTXVersion >= 82; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
// GPUs with "a" suffix have include architecture-accelerated features that
diff --git a/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll b/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
new file mode 100644
index 0000000000000..7cdced1778a53
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
@@ -0,0 +1,951 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s
+; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %}
+
+; CHECK-LABEL: generic_plain
+define void @generic_plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
+ ; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load i8, ptr %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i8 %a.add, ptr %a
+
+ ; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load i16, ptr %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i16 %b.add, ptr %b
+
+ ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load i32, ptr %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store i32 %c.add, ptr %c
+
+ ; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load i64, ptr %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store i64 %d.add, ptr %d
+
+ ; CHECK: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load float, ptr %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store float %e.add, ptr %c
+
+ ; CHECK: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load double, ptr %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store double %f.add, ptr %c
+
+ ret void
+}
+
+; CHECK-LABEL: generic_volatile
+define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
+ ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load volatile i8, ptr %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i8 %a.add, ptr %a
+
+ ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load volatile i16, ptr %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i16 %b.add, ptr %b
+
+ ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load volatile i32, ptr %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile i32 %c.add, ptr %c
+
+ ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load volatile i64, ptr %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store volatile i64 %d.add, ptr %d
+
+ ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load volatile float, ptr %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store volatile float %e.add, ptr %c
+
+ ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load volatile double, ptr %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store volatile double %f.add, ptr %c
+
+ ret void
+}
+
+; CHECK-LABEL: generic_monotonic
+define void @generic_monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr %a monotonic, align 1
+
+ ; CHECK: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr %b monotonic, align 2
+
+ ; CHECK: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr %c monotonic, align 4
+
+ ; CHECK: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr %d monotonic, align 8
+
+ ; CHECK: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr %e monotonic, align 4
+
+ ; CHECK: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: generic_acq_rel
+define void @generic_acq_rel(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr %a acquire, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr %a release, align 1
+
+ ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr %b acquire, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr %b release, align 2
+
+ ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr %c acquire, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr %c release, align 4
+
+ ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr %d acquire, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr %d release, align 8
+
+ ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr %e acquire, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr %e release, align 4
+
+ ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr %e acquire, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr %e release, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: generic_monotonic_volatile
+define void @generic_monotonic_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr %a monotonic, align 1
+
+ ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr %b monotonic, align 2
+
+ ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr %c monotonic, align 4
+
+ ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr %d monotonic, align 8
+
+ ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr %e monotonic, align 4
+
+ ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr %e monotonic, align 8
+
+ ret void
+}
+
+;; global statespace
+
+; CHECK-LABEL: global_plain
+define void @global_plain(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
+ ; CHECK: ld.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load i8, ptr addrspace(1) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i8 %a.add, ptr addrspace(1) %a
+
+ ; CHECK: ld.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load i16, ptr addrspace(1) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i16 %b.add, ptr addrspace(1) %b
+
+ ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load i32, ptr addrspace(1) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store i32 %c.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load i64, ptr addrspace(1) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store i64 %d.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load float, ptr addrspace(1) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store float %e.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load double, ptr addrspace(1) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store double %f.add, ptr addrspace(1) %c
+
+ ret void
+}
+
+; CHECK-LABEL: global_volatile
+define void @global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
+ ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load volatile i8, ptr addrspace(1) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i8 %a.add, ptr addrspace(1) %a
+
+ ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load volatile i16, ptr addrspace(1) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i16 %b.add, ptr addrspace(1) %b
+
+ ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load volatile i32, ptr addrspace(1) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile i32 %c.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load volatile i64, ptr addrspace(1) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store volatile i64 %d.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load volatile float, ptr addrspace(1) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store volatile float %e.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load volatile double, ptr addrspace(1) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store volatile double %f.add, ptr addrspace(1) %c
+
+ ret void
+}
+
+; CHECK-LABEL: global_monotonic
+define void @global_monotonic(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
+
+ ; CHECK: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(1) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(1) %b monotonic, align 2
+
+ ; CHECK: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(1) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(1) %c monotonic, align 4
+
+ ; CHECK: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(1) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(1) %d monotonic, align 8
+
+ ; CHECK: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(1) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(1) %e monotonic, align 4
+
+ ; CHECK: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(1) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(1) %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: global_monotonic_volatile
+define void @global_monotonic_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
+
+ ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(1) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(1) %b monotonic, align 2
+
+ ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(1) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(1) %c monotonic, align 4
+
+ ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(1) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(1) %d monotonic, align 8
+
+ ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(1) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(1) %e monotonic, align 4
+
+ ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(1) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(1) %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: global_acq_rel
+define void @global_acq_rel(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(1) %a acquire, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(1) %a release, align 1
+
+ ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(1) %b acquire, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(1) %b release, align 2
+
+ ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(1) %c acquire, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(1) %c release, align 4
+
+ ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(1) %d acquire, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(1) %d release, align 8
+
+ ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(1) %e acquire, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(1) %e release, align 4
+
+ ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(1) %e acquire, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(1) %e release, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: global_acq_rel_volatile
+define void @global_acq_rel_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(1) %a acquire, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(1) %a release, align 1
+
+ ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(1) %b acquire, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(1) %b release, align 2
+
+ ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(1) %c acquire, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(1) %c release, align 4
+
+ ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(1) %d acquire, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(1) %d release, align 8
+
+ ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(1) %e acquire, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(1) %e release, align 4
+
+ ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(1) %e acquire, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(1) %e release, align 8
+
+ ret void
+}
+
+;; shared statespace
+
+; CHECK-LABEL: shared_plain
+define void @shared_plain(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
+ ; CHECK: ld.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load i8, ptr addrspace(3) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i8 %a.add, ptr addrspace(3) %a
+
+ ; CHECK: ld.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load i16, ptr addrspace(3) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i16 %b.add, ptr addrspace(3) %b
+
+ ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load i32, ptr addrspace(3) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store i32 %c.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load i64, ptr addrspace(3) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store i64 %d.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load float, ptr addrspace(3) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store float %e.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load double, ptr addrspace(3) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store double %f.add, ptr addrspace(3) %c
+
+ ret void
+}
+
+; CHECK-LABEL: shared_volatile
+define void @shared_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
+ ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load volatile i8, ptr addrspace(3) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i8 %a.add, ptr addrspace(3) %a
+
+ ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load volatile i16, ptr addrspace(3) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i16 %b.add, ptr addrspace(3) %b
+
+ ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load volatile i32, ptr addrspace(3) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile i32 %c.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load volatile i64, ptr addrspace(3) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store volatile i64 %d.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load volatile float, ptr addrspace(3) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store volatile float %e.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load volatile double, ptr addrspace(3) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store volatile double %f.add, ptr addrspace(3) %c
+
+ ret void
+}
+
+; CHECK-LABEL: shared_monotonic
+define void @shared_monotonic(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
+
+ ; CHECK: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(3) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(3) %b monotonic, align 2
+
+ ; CHECK: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(3) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(3) %c monotonic, align 4
+
+ ; CHECK: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(3) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(3) %d monotonic, align 8
+
+ ; CHECK: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(3) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(3) %e monotonic, align 4
+
+ ; CHECK: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(3) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(3) %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: shared_monotonic_volatile
+define void @shared_monotonic_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1
+
+ ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(3) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(3) %b monotonic, align 2
+
+ ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(3) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(3) %c monotonic, align 4
+
+ ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(3) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(3) %d monotonic, align 8
+
+ ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(3) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(3) %e monotonic, align 4
+
+ ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(3) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(3) %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: shared_acq_rel
+define void @shared_acq_rel(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(3) %a acquire, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(3) %a release, align 1
+
+ ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(3) %b acquire, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(3) %b release, align 2
+
+ ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(3) %c acquire, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(3) %c release, align 4
+
+ ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(3) %d acquire, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(3) %d release, align 8
+
+ ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(3) %e acquire, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(3) %e release, align 4
+
+ ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(3) %e acquire, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(3) %e release, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: shared_acq_rel_volatile
+define void @shared_acq_rel_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(3) %a acquire, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(3) %a release, align 1
+
+ ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(3) %b acquire, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(3) %b release, align 2
+
+ ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(3) %c acquire, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(3) %c release, align 4
+
+ ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(3) %d acquire, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(3) %d release, align 8
+
+ ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(3) %e acquire, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(3) %e release, align 4
+
+ ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(3) %e acquire, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(3) %e release, align 8
+
+ ret void
+}
+
+;; local statespace
+
+; CHECK-LABEL: local_plain
+define void @local_plain(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load i8, ptr addrspace(5) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i8 %a.add, ptr addrspace(5) %a
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load i16, ptr addrspace(5) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i16 %b.add, ptr addrspace(5) %b
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load i32, ptr addrspace(5) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store i32 %c.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load i64, ptr addrspace(5) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store i64 %d.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load float, ptr addrspace(5) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store float %e.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load double, ptr addrspace(5) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store double %f.add, ptr addrspace(5) %c
+
+ ret void
+}
+
+; CHECK-LABEL: local_volatile
+define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load volatile i8, ptr addrspace(5) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i8 %a.add, ptr addrspace(5) %a
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load volatile i16, ptr addrspace(5) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i16 %b.add, ptr addrspace(5) %b
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load volatile i32, ptr addrspace(5) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile i32 %c.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load volatile i64, ptr addrspace(5) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store volatile i64 %d.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load volatile float, ptr addrspace(5) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store volatile float %e.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load volatile double, ptr addrspace(5) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store volatile double %f.add, ptr addrspace(5) %c
+
+ ret void
+}
+
+; CHECK-LABEL: local_monotonic
+define void @local_monotonic(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(5) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(5) %b monotonic, align 2
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(5) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(5) %c monotonic, align 4
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(5) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(5) %d monotonic, align 8
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(5) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(5) %e monotonic, align 4
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(5) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(5) %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: local_monotonic_volatile
+define void @local_monotonic_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(5) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(5) %b monotonic, align 2
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(5) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(5) %c monotonic, align 4
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(5) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(5) %d monotonic, align 8
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(5) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(5) %e monotonic, align 4
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(5) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(5) %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: local_acq_rel
+define void @local_acq_rel(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(5) %a acquire, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(5) %a release, align 1
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(5) %b acquire, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(5) %b release, align 2
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(5) %c acquire, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(5) %c release, align 4
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(5) %d acquire, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(5) %d release, align 8
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(5) %e acquire, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(5) %e release, align 4
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(5) %e acquire, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(5) %e release, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: local_acq_rel_volatile
+define void @local_acq_rel_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(5) %a acquire, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(5) %a release, align 1
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(5) %b acquire, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(5) %b release, align 2
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(5) %c acquire, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(5) %c release, align 4
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(5) %d acquire, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(5) %d release, align 8
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(5) %e acquire, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(5) %e release, align 4
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(5) %e acquire, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(5) %e release, align 8
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/load-store.ll b/llvm/test/CodeGen/NVPTX/load-store.ll
index c477bd9e744cd..27065f5eca9f4 100644
--- a/llvm/test/CodeGen/NVPTX/load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store.ll
@@ -1,8 +1,10 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
-; CHECK-LABEL: plain
-define void @plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
+; generic statespace
+
+; CHECK-LABEL: generic_plain
+define void @generic_plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load i8, ptr %a
%a.add = add i8 %a.load, 1
@@ -42,8 +44,8 @@ define void @plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
ret void
}
-; CHECK-LABEL: volatile
-define void @volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
+; CHECK-LABEL: generic_volatile
+define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load volatile i8, ptr %a
%a.add = add i8 %a.load, 1
@@ -83,8 +85,8 @@ define void @volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
ret void
}
-; CHECK-LABEL: monotonic
-define void @monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+; CHECK-LABEL: generic_monotonic
+define void @generic_monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
@@ -123,3 +125,542 @@ define void @monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_add
ret void
}
+
+; CHECK-LABEL: generic_monotonic_volatile
+define void @generic_monotonic_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr %a monotonic, align 1
+
+ ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr %b monotonic, align 2
+
+ ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr %c monotonic, align 4
+
+ ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr %d monotonic, align 8
+
+ ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr %e monotonic, align 4
+
+ ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr %e monotonic, align 8
+
+ ret void
+}
+
+;; global statespace
+
+; CHECK-LABEL: global_plain
+define void @global_plain(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
+ ; CHECK: ld.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load i8, ptr addrspace(1) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i8 %a.add, ptr addrspace(1) %a
+
+ ; CHECK: ld.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load i16, ptr addrspace(1) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i16 %b.add, ptr addrspace(1) %b
+
+ ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load i32, ptr addrspace(1) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store i32 %c.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load i64, ptr addrspace(1) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store i64 %d.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load float, ptr addrspace(1) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store float %e.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load double, ptr addrspace(1) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store double %f.add, ptr addrspace(1) %c
+
+ ret void
+}
+
+; CHECK-LABEL: global_volatile
+define void @global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
+ ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load volatile i8, ptr addrspace(1) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i8 %a.add, ptr addrspace(1) %a
+
+ ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load volatile i16, ptr addrspace(1) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i16 %b.add, ptr addrspace(1) %b
+
+ ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load volatile i32, ptr addrspace(1) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile i32 %c.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load volatile i64, ptr addrspace(1) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store volatile i64 %d.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load volatile float, ptr addrspace(1) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store volatile float %e.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load volatile double, ptr addrspace(1) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store volatile double %f.add, ptr addrspace(1) %c
+
+ ret void
+}
+
+; CHECK-LABEL: global_monotonic
+define void @global_monotonic(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
+
+ ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(1) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(1) %b monotonic, align 2
+
+ ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(1) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(1) %c monotonic, align 4
+
+ ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(1) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(1) %d monotonic, align 8
+
+ ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(1) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(1) %e monotonic, align 4
+
+ ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(1) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(1) %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: global_monotonic_volatile
+define void @global_monotonic_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
+
+ ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(1) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(1) %b monotonic, align 2
+
+ ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(1) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(1) %c monotonic, align 4
+
+ ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(1) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(1) %d monotonic, align 8
+
+ ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(1) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(1) %e monotonic, align 4
+
+ ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(1) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(1) %e monotonic, align 8
+
+ ret void
+}
+
+;; shared statespace
+
+; CHECK-LABEL: shared_plain
+define void @shared_plain(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
+ ; CHECK: ld.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load i8, ptr addrspace(3) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i8 %a.add, ptr addrspace(3) %a
+
+ ; CHECK: ld.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load i16, ptr addrspace(3) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i16 %b.add, ptr addrspace(3) %b
+
+ ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load i32, ptr addrspace(3) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store i32 %c.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load i64, ptr addrspace(3) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store i64 %d.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load float, ptr addrspace(3) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store float %e.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load double, ptr addrspace(3) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store double %f.add, ptr addrspace(3) %c
+
+ ret void
+}
+
+; CHECK-LABEL: shared_volatile
+define void @shared_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
+ ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load volatile i8, ptr addrspace(3) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i8 %a.add, ptr addrspace(3) %a
+
+ ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load volatile i16, ptr addrspace(3) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i16 %b.add, ptr addrspace(3) %b
+
+ ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load volatile i32, ptr addrspace(3) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile i32 %c.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load volatile i64, ptr addrspace(3) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store volatile i64 %d.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load volatile float, ptr addrspace(3) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store volatile float %e.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load volatile double, ptr addrspace(3) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store volatile double %f.add, ptr addrspace(3) %c
+
+ ret void
+}
+
+; CHECK-LABEL: shared_monotonic
+define void @shared_monotonic(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
+
+ ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(3) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(3) %b monotonic, align 2
+
+ ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(3) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(3) %c monotonic, align 4
+
+ ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(3) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(3) %d monotonic, align 8
+
+ ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(3) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(3) %e monotonic, align 4
+
+ ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(3) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(3) %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: shared_monotonic_volatile
+define void @shared_monotonic_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1
+
+ ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(3) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(3) %b monotonic, align 2
+
+ ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(3) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(3) %c monotonic, align 4
+
+ ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(3) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(3) %d monotonic, align 8
+
+ ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(3) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(3) %e monotonic, align 4
+
+ ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(3) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(3) %e monotonic, align 8
+
+ ret void
+}
+
+;; local statespace
+
+; CHECK-LABEL: local_plain
+define void @local_plain(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load i8, ptr addrspace(5) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i8 %a.add, ptr addrspace(5) %a
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load i16, ptr addrspace(5) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store i16 %b.add, ptr addrspace(5) %b
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load i32, ptr addrspace(5) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store i32 %c.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load i64, ptr addrspace(5) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store i64 %d.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load float, ptr addrspace(5) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store float %e.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load double, ptr addrspace(5) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store double %f.add, ptr addrspace(5) %c
+
+ ret void
+}
+
+; CHECK-LABEL: local_volatile
+define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load volatile i8, ptr addrspace(5) %a
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i8 %a.add, ptr addrspace(5) %a
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load volatile i16, ptr addrspace(5) %b
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store volatile i16 %b.add, ptr addrspace(5) %b
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load volatile i32, ptr addrspace(5) %c
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile i32 %c.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load volatile i64, ptr addrspace(5) %d
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store volatile i64 %d.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load volatile float, ptr addrspace(5) %c
+ %e.add = fadd float %e.load, 1.
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store volatile float %e.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load volatile double, ptr addrspace(5) %c
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store volatile double %f.add, ptr addrspace(5) %c
+
+ ret void
+}
+
+; CHECK-LABEL: local_monotonic
+define void @local_monotonic(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(5) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(5) %b monotonic, align 2
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(5) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(5) %c monotonic, align 4
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(5) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(5) %d monotonic, align 8
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(5) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(5) %e monotonic, align 4
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(5) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(5) %e monotonic, align 8
+
+ ret void
+}
+
+; CHECK-LABEL: local_monotonic_volatile
+define void @local_monotonic_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
+ %a.add = add i8 %a.load, 1
+ ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1
+
+ ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(5) %b monotonic, align 2
+ %b.add = add i16 %b.load, 1
+ ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(5) %b monotonic, align 2
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(5) %c monotonic, align 4
+ %c.add = add i32 %c.load, 1
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(5) %c monotonic, align 4
+
+ ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(5) %d monotonic, align 8
+ %d.add = add i64 %d.load, 1
+ ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(5) %d monotonic, align 8
+
+ ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(5) %e monotonic, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(5) %e monotonic, align 4
+
+ ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(5) %e monotonic, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(5) %e monotonic, align 8
+
+ ret void
+}
More information about the llvm-commits
mailing list