[llvm] f55abd5 - [NVPTX] Add Volta Atomic SequentiallyConsistent Load and Store Operations (#98551)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 6 10:32:54 PDT 2024
Author: gonzalobg
Date: 2024-08-06T10:32:51-07:00
New Revision: f55abd545d9ec26e14c7a45727c16fc97b46db3c
URL: https://github.com/llvm/llvm-project/commit/f55abd545d9ec26e14c7a45727c16fc97b46db3c
DIFF: https://github.com/llvm/llvm-project/commit/f55abd545d9ec26e14c7a45727c16fc97b46db3c.diff
LOG: [NVPTX] Add Volta Atomic SequentiallyConsistent Load and Store Operations (#98551)
This PR Builds on #98022 .
It adds support for Volta's SequentiallyConsistent Load and Store
operations at system scope.
Added:
Modified:
llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
llvm/lib/Target/NVPTX/NVPTX.h
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/lib/Target/NVPTX/NVPTXUtilities.h
llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
llvm/test/CodeGen/NVPTX/load-store.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index a004d64c21cc69..5b568b0487b45a 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -13,12 +13,14 @@
#include "MCTargetDesc/NVPTXInstPrinter.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
+#include "NVPTXUtilities.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/FormattedStream.h"
#include <cctype>
using namespace llvm;
@@ -228,31 +230,29 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
const MCOperand &MO = MI->getOperand(OpNum);
int Imm = (int) MO.getImm();
if (!strcmp(Modifier, "sem")) {
- switch (Imm) {
- case NVPTX::PTXLdStInstCode::NotAtomic:
+ auto Ordering = NVPTX::Ordering(Imm);
+ switch (Ordering) {
+ case NVPTX::Ordering::NotAtomic:
break;
- case NVPTX::PTXLdStInstCode::Volatile:
+ case NVPTX::Ordering::Volatile:
O << ".volatile";
break;
- case NVPTX::PTXLdStInstCode::Relaxed:
+ case NVPTX::Ordering::Relaxed:
O << ".relaxed.sys";
break;
- case NVPTX::PTXLdStInstCode::Acquire:
+ case NVPTX::Ordering::Acquire:
O << ".acquire.sys";
break;
- case NVPTX::PTXLdStInstCode::Release:
+ case NVPTX::Ordering::Release:
O << ".release.sys";
break;
- case NVPTX::PTXLdStInstCode::RelaxedMMIO:
+ case NVPTX::Ordering::RelaxedMMIO:
O << ".mmio.relaxed.sys";
break;
default:
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "NVPTX LdStCode Printer does not support \"" << Imm
- << "\" sem modifier.";
- report_fatal_error(OS.str());
- break;
+ report_fatal_error(formatv(
+ "NVPTX LdStCode Printer does not support \"{}\" sem modifier.",
+ OrderingToCString(Ordering)));
}
} else if (!strcmp(Modifier, "addsp")) {
switch (Imm) {
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 3c7167b1570254..f6f6acb9e13c90 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -16,6 +16,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CodeGen.h"
namespace llvm {
@@ -106,15 +107,25 @@ enum LoadStore {
isStoreShift = 6
};
-namespace PTXLdStInstCode {
-enum MemorySemantic {
- NotAtomic = 0, // PTX calls these: "Weak"
- Volatile = 1,
- Relaxed = 2,
- Acquire = 3,
- Release = 4,
- RelaxedMMIO = 5
+// Extends LLVM AtomicOrdering with PTX Orderings:
+using OrderingUnderlyingType = unsigned int;
+enum Ordering : OrderingUnderlyingType {
+ NotAtomic = (OrderingUnderlyingType)
+ AtomicOrdering::NotAtomic, // PTX calls these: "Weak"
+ // Unordered = 1, // NVPTX maps LLVM Unorderd to Relaxed
+ Relaxed = (OrderingUnderlyingType)AtomicOrdering::Monotonic,
+ // Consume = 3, // Unimplemented in LLVM; NVPTX would map to "Acquire"
+ Acquire = (OrderingUnderlyingType)AtomicOrdering::Acquire,
+ Release = (OrderingUnderlyingType)AtomicOrdering::Release,
+ // AcquireRelease = 6, // TODO
+ SequentiallyConsistent =
+ (OrderingUnderlyingType)AtomicOrdering::SequentiallyConsistent,
+ Volatile = SequentiallyConsistent + 1,
+ RelaxedMMIO = Volatile + 1,
+ LAST = RelaxedMMIO
};
+
+namespace PTXLdStInstCode {
enum AddressSpace {
GENERIC = 0,
GLOBAL = 1,
@@ -134,7 +145,7 @@ enum VecType {
V2 = 2,
V4 = 4
};
-}
+} // namespace PTXLdStInstCode
/// PTXCvtMode - Conversion code enumeration
namespace PTXCvtMode {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 96456ad0547ea7..25c198f0121e59 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -714,21 +715,28 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
return NVPTX::PTXLdStInstCode::GENERIC;
}
-static unsigned int getCodeMemorySemantic(MemSDNode *N,
- const NVPTXSubtarget *Subtarget) {
+namespace {
+
+struct OperationOrderings {
+ NVPTX::Ordering InstructionOrdering, FenceOrdering;
+ OperationOrderings(NVPTX::Ordering IO = NVPTX::Ordering::NotAtomic,
+ NVPTX::Ordering FO = NVPTX::Ordering::NotAtomic)
+ : InstructionOrdering(IO), FenceOrdering(FO) {}
+};
+
+static OperationOrderings
+getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
AtomicOrdering Ordering = N->getSuccessOrdering();
auto CodeAddrSpace = getCodeAddrSpace(N);
bool HasMemoryOrdering = Subtarget->hasMemoryOrdering();
bool HasRelaxedMMIO = Subtarget->hasRelaxedMMIO();
- // TODO: lowering for SequentiallyConsistent Operations: for now, we error.
- // TODO: lowering for AcquireRelease Operations: for now, we error.
- //
-
// clang-format off
- // Lowering for non-SequentiallyConsistent Operations
+ // Lowering for Load/Store Operations (note: AcquireRelease Loads or Stores error).
+ // Note: uses of Relaxed in the Atomic column of this table refer
+ // to LLVM AtomicOrdering::Monotonic.
//
// | Atomic | Volatile | Statespace | PTX sm_60- | PTX sm_70+ |
// |---------|----------|--------------------|------------|------------------------------|
@@ -749,6 +757,25 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
// | Other | Yes | Generic, Shared, | Error [2] | <atomic sem> [3] |
// | | | / Global [0] | | |
+ // Lowering of CUDA C++ SequentiallyConsistent Operations and Fences to PTX
+ // by following the ABI proven sound in:
+ // Lustig et al, A Formal Analysis of the NVIDIA PTX Memory Consistency Model, ASPLOS’19.
+ // https://dl.acm.org/doi/pdf/10.1145/3297858.3304043
+ //
+ // | CUDA C++ Atomic Operation or Atomic Fence | PTX Atomic Operation or Fence |
+ // |------------------------------------------------------|-------------------------------|
+ // | cuda::atomic_thread_fence | fence.sc.<scope>; |
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | |
+ // |------------------------------------------------------|-------------------------------|
+ // | cuda::atomic_load | fence.sc.<scope>; |
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | ld.acquire.<scope>; |
+ // |------------------------------------------------------|-------------------------------|
+ // | cuda::atomic_store | fence.sc.<scope>; |
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | st.release.<scope>; |
+ // |------------------------------------------------------|-------------------------------|
+ // | cuda::atomic_fetch_<op> | fence.sc.<scope>; |
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | atom.acq_rel.<scope>; |
+
// clang-format on
// [0]: volatile and atomics are only supported on global or shared
@@ -788,11 +815,10 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
// - the "weak" memory instruction we are currently lowering to, and
// - some other instruction that preserves the side-effect, e.g.,
// a dead dummy volatile load.
-
if (CodeAddrSpace == NVPTX::PTXLdStInstCode::LOCAL ||
CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT ||
CodeAddrSpace == NVPTX::PTXLdStInstCode::PARAM) {
- return NVPTX::PTXLdStInstCode::NotAtomic;
+ return NVPTX::Ordering::NotAtomic;
}
// [2]: Atomics with Ordering
diff erent than Unordered or Relaxed are not
@@ -801,12 +827,11 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
Ordering == AtomicOrdering::Unordered ||
Ordering == AtomicOrdering::Monotonic) &&
!HasMemoryOrdering) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX does not support \"atomic\" for orderings
diff erent than"
- "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order is: \""
- << toIRString(Ordering) << "\".";
- report_fatal_error(OS.str());
+ report_fatal_error(
+ formatv("PTX does not support \"atomic\" for orderings
diff erent than"
+ "\"NotAtomic\" or \"Monotonic\" for sm_60 or older, but order "
+ "is: \"{}\".",
+ toIRString(Ordering)));
}
// [3]: TODO: these should eventually use .mmio<.atomic sem>; for now we drop
@@ -820,68 +845,76 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
(CodeAddrSpace == NVPTX::PTXLdStInstCode::GENERIC ||
CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL ||
CodeAddrSpace == NVPTX::PTXLdStInstCode::SHARED);
+ if (!AddrGenericOrGlobalOrShared)
+ return NVPTX::Ordering::NotAtomic;
+
bool UseRelaxedMMIO =
HasRelaxedMMIO && CodeAddrSpace == NVPTX::PTXLdStInstCode::GLOBAL;
switch (Ordering) {
case AtomicOrdering::NotAtomic:
- return N->isVolatile() && AddrGenericOrGlobalOrShared
- ? NVPTX::PTXLdStInstCode::Volatile
- : NVPTX::PTXLdStInstCode::NotAtomic;
+ return N->isVolatile() ? NVPTX::Ordering::Volatile
+ : NVPTX::Ordering::NotAtomic;
case AtomicOrdering::Unordered:
// We lower unordered in the exact same way as 'monotonic' to respect
// LLVM IR atomicity requirements.
case AtomicOrdering::Monotonic:
if (N->isVolatile())
- return UseRelaxedMMIO ? NVPTX::PTXLdStInstCode::RelaxedMMIO
- : AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
- : NVPTX::PTXLdStInstCode::NotAtomic;
+ return UseRelaxedMMIO ? NVPTX::Ordering::RelaxedMMIO
+ : NVPTX::Ordering::Volatile;
else
- return HasMemoryOrdering ? NVPTX::PTXLdStInstCode::Relaxed
- : AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Volatile
- : NVPTX::PTXLdStInstCode::NotAtomic;
+ return HasMemoryOrdering ? NVPTX::Ordering::Relaxed
+ : NVPTX::Ordering::Volatile;
+ // case AtomicOrdering::Consume: // If LLVM ever provides this, lower it to
+ // Acquire.
case AtomicOrdering::Acquire:
- if (!N->readMem()) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX only supports Acquire Ordering on reads: "
- << N->getOperationName();
- N->print(OS);
- report_fatal_error(OS.str());
- }
- return AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Acquire
- : NVPTX::PTXLdStInstCode::NotAtomic;
+ if (!N->readMem())
+ report_fatal_error(
+ formatv("PTX only supports Acquire Ordering on reads: {}",
+ N->getOperationName()));
+ return NVPTX::Ordering::Acquire;
case AtomicOrdering::Release:
- if (!N->writeMem()) {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX only supports Release Ordering on writes: "
- << N->getOperationName();
- N->print(OS);
- report_fatal_error(OS.str());
- }
- return AddrGenericOrGlobalOrShared ? NVPTX::PTXLdStInstCode::Release
- : NVPTX::PTXLdStInstCode::NotAtomic;
+ if (!N->writeMem())
+ report_fatal_error(
+ formatv("PTX only supports Release Ordering on writes: {}",
+ N->getOperationName()));
+ return NVPTX::Ordering::Release;
case AtomicOrdering::AcquireRelease: {
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "PTX only supports AcquireRelease Ordering on read-modify-write: "
- << N->getOperationName();
- N->print(OS);
- report_fatal_error(OS.str());
+ report_fatal_error(
+ formatv("NVPTX does not support AcquireRelease Ordering on "
+ "read-modify-write "
+ "yet and PTX does not support it on loads or stores: {}",
+ N->getOperationName()));
+ }
+ case AtomicOrdering::SequentiallyConsistent: {
+ // LLVM-IR SequentiallyConsistent atomics map to a two-instruction PTX
+ // sequence including a "fence.sc.sco" and the memory instruction with an
+ // Ordering that
diff ers from "sc": acq, rel, or acq_rel, depending on
+ // whether the memory operation is a read, write, or read-modify-write.
+ //
+ // This sets the ordering of the fence to SequentiallyConsistent, and
+ // sets the corresponding ordering for the instruction.
+ NVPTX::Ordering InstrOrder;
+ if (N->readMem())
+ InstrOrder = NVPTX::Ordering::Acquire;
+ else if (N->writeMem())
+ InstrOrder = NVPTX::Ordering::Release;
+ else
+ report_fatal_error(
+ formatv("NVPTX does not support SequentiallyConsistent Ordering on "
+ "read-modify-writes yet: {}",
+ N->getOperationName()));
+ return OperationOrderings(InstrOrder,
+ NVPTX::Ordering::SequentiallyConsistent);
}
- case AtomicOrdering::SequentiallyConsistent:
- // TODO: support AcquireRelease and SequentiallyConsistent
- SmallString<256> Msg;
- raw_svector_ostream OS(Msg);
- OS << "NVPTX backend does not support AtomicOrdering \""
- << toIRString(Ordering) << "\" yet.";
- report_fatal_error(OS.str());
}
-
- llvm_unreachable("unexpected unhandled case");
+ report_fatal_error(
+ formatv("NVPTX backend does not support AtomicOrdering \"{}\" yet.",
+ toIRString(Ordering)));
}
+} // namespace
+
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
unsigned CodeAddrSpace, MachineFunction *F) {
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
@@ -924,6 +957,35 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
});
}
+NVPTX::Ordering NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL,
+ SDValue &Chain,
+ MemSDNode *N) {
+ // Some memory instructions - loads, stores, atomics - need an extra fence
+ // instruction. Get the memory order of the instruction, and that of its
+ // fence, if any.
+ auto [InstructionOrdering, FenceOrdering] =
+ getOperationOrderings(N, Subtarget);
+
+ // If a fence is required before the operation, insert it:
+ switch (NVPTX::Ordering(FenceOrdering)) {
+ case NVPTX::Ordering::NotAtomic:
+ break;
+ case NVPTX::Ordering::SequentiallyConsistent: {
+ unsigned Op = Subtarget->hasMemoryOrdering()
+ ? NVPTX::atomic_thread_fence_seq_cst_sys
+ : NVPTX::INT_MEMBAR_SYS;
+ Chain = SDValue(CurDAG->getMachineNode(Op, DL, MVT::Other, Chain), 0);
+ break;
+ }
+ default:
+ report_fatal_error(
+ formatv("Unexpected fence ordering: \"{}\".",
+ OrderingToCString(NVPTX::Ordering(FenceOrdering))));
+ }
+
+ return InstructionOrdering;
+}
+
bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
unsigned IID = N->getConstantOperandVal(0);
switch (IID) {
@@ -1070,17 +1132,15 @@ static int getLdStRegType(EVT VT) {
}
bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
- SDLoc dl(N);
MemSDNode *LD = cast<MemSDNode>(N);
assert(LD->readMem() && "Expected load");
- LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
- EVT LoadedVT = LD->getMemoryVT();
- SDNode *NVPTXLD = nullptr;
// do not support pre/post inc/dec
+ LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
if (PlainLoad && PlainLoad->isIndexed())
return false;
+ EVT LoadedVT = LD->getMemoryVT();
if (!LoadedVT.isSimple())
return false;
@@ -1089,13 +1149,13 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
return tryLDGLDU(N);
}
-
- // Memory Semantic Setting
- unsigned int CodeMemorySem = getCodeMemorySemantic(LD, Subtarget);
-
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
+ SDLoc DL(N);
+ SDValue Chain = N->getOperand(0);
+ auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, LD);
+
// Type Setting: fromType + fromTypeWidth
//
// Sign : ISD::SEXTLOAD
@@ -1105,45 +1165,42 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
MVT SimpleVT = LoadedVT.getSimpleVT();
MVT ScalarVT = SimpleVT.getScalarType();
// Read at least 8 bits (predicates are stored as 8-bit values)
- unsigned fromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
- unsigned int fromType;
+ unsigned FromTypeWidth = std::max(8U, (unsigned)ScalarVT.getSizeInBits());
+ unsigned int FromType;
// Vector Setting
- unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ unsigned VecType = NVPTX::PTXLdStInstCode::Scalar;
if (SimpleVT.isVector()) {
assert((Isv2x16VT(LoadedVT) || LoadedVT == MVT::v4i8) &&
"Unexpected vector type");
// v2f16/v2bf16/v2i16 is loaded using ld.b32
- fromTypeWidth = 32;
+ FromTypeWidth = 32;
}
if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
- fromType = NVPTX::PTXLdStInstCode::Signed;
+ FromType = NVPTX::PTXLdStInstCode::Signed;
else
- fromType = getLdStRegType(ScalarVT);
+ FromType = getLdStRegType(ScalarVT);
// Create the machine instruction DAG
- SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue Addr;
SDValue Offset, Base;
std::optional<unsigned> Opcode;
MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
+ SmallVector<SDValue, 12> Ops({getI32Imm(InstructionOrdering, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL)});
+
if (SelectDirectAddr(N1, Addr)) {
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl),
- Addr,
- Chain};
- NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
+ Ops.append({Addr, Chain});
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
@@ -1151,15 +1208,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl),
- Base,
- Offset,
- Chain};
- NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
+ Ops.append({Base, Offset, Chain});
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
if (PointerSize == 64)
@@ -1173,15 +1222,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl),
- Base,
- Offset,
- Chain};
- NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
+ Ops.append({Base, Offset, Chain});
} else {
if (PointerSize == 64)
Opcode =
@@ -1194,16 +1235,11 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(fromType, dl),
- getI32Imm(fromTypeWidth, dl),
- N1,
- Chain};
- NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
+ Ops.append({N1, Chain});
}
+ SDNode *NVPTXLD =
+ CurDAG->getMachineNode(*Opcode, DL, TargetVT, MVT::Other, Ops);
if (!NVPTXLD)
return false;
@@ -1215,16 +1251,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
}
bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
-
- SDValue Chain = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- SDValue Addr, Offset, Base;
- std::optional<unsigned> Opcode;
- SDLoc DL(N);
- SDNode *LD;
MemSDNode *MemSD = cast<MemSDNode>(N);
EVT LoadedVT = MemSD->getMemoryVT();
-
if (!LoadedVT.isSimple())
return false;
@@ -1233,12 +1261,12 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
return tryLDGLDU(N);
}
-
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
- // Memory Semantic Setting
- unsigned int CodeMemorySem = getCodeMemorySemantic(MemSD, Subtarget);
+ SDLoc DL(N);
+ SDValue Chain = N->getOperand(0);
+ auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, MemSD);
// Vector Setting
MVT SimpleVT = LoadedVT.getSimpleVT();
@@ -1286,6 +1314,16 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
FromTypeWidth = 32;
}
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ std::optional<unsigned> Opcode;
+ SDNode *LD;
+
+ SmallVector<SDValue, 12> Ops({getI32Imm(InstructionOrdering, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL)});
+
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
@@ -1305,14 +1343,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL),
- getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL),
- Addr,
- Chain};
- LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
+ Ops.append({Addr, Chain});
} else if (PointerSize == 64
? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
@@ -1334,15 +1365,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL),
- getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL),
- Base,
- Offset,
- Chain};
- LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
+ Ops.append({Base, Offset, Chain});
} else if (PointerSize == 64
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
@@ -1384,16 +1407,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL),
- getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL),
- Base,
- Offset,
- Chain};
-
- LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
+ Ops.append({Base, Offset, Chain});
} else {
if (PointerSize == 64) {
switch (N->getOpcode()) {
@@ -1434,15 +1448,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
if (!Opcode)
return false;
- SDValue Ops[] = {getI32Imm(CodeMemorySem, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL),
- getI32Imm(FromType, DL),
- getI32Imm(FromTypeWidth, DL),
- Op1,
- Chain};
- LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
+ Ops.append({Op1, Chain});
}
+ LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
@@ -1452,8 +1460,6 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
}
bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
-
- SDValue Chain = N->getOperand(0);
SDValue Op1;
MemSDNode *Mem;
bool IsLDG = true;
@@ -1483,12 +1489,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Mem = cast<MemSDNode>(N);
}
- std::optional<unsigned> Opcode;
- SDLoc DL(N);
- SDNode *LD;
- SDValue Base, Offset, Addr;
EVT OrigType = N->getValueType(0);
-
EVT EltVT = Mem->getMemoryVT();
unsigned NumElts = 1;
if (EltVT.isVector()) {
@@ -1517,6 +1518,12 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
}
InstVTs.push_back(MVT::Other);
SDVTList InstVTList = CurDAG->getVTList(InstVTs);
+ SDValue Chain = N->getOperand(0);
+
+ std::optional<unsigned> Opcode;
+ SDLoc DL(N);
+ SDNode *LD;
+ SDValue Base, Offset, Addr;
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
@@ -1867,19 +1874,17 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
}
bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
- SDLoc dl(N);
MemSDNode *ST = cast<MemSDNode>(N);
assert(ST->writeMem() && "Expected store");
StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
assert((PlainStore || AtomicStore) && "Expected store");
- EVT StoreVT = ST->getMemoryVT();
- SDNode *NVPTXST = nullptr;
// do not support pre/post inc/dec
if (PlainStore && PlainStore->isIndexed())
return false;
+ EVT StoreVT = ST->getMemoryVT();
if (!StoreVT.isSimple())
return false;
@@ -1888,29 +1893,28 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
- // Memory Semantic Setting
- unsigned int CodeMemorySem = getCodeMemorySemantic(ST, Subtarget);
+ SDLoc DL(N);
+ SDValue Chain = ST->getChain();
+ auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, ST);
// Vector Setting
MVT SimpleVT = StoreVT.getSimpleVT();
- unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ unsigned VecType = NVPTX::PTXLdStInstCode::Scalar;
// Type Setting: toType + toTypeWidth
// - for integer type, always use 'u'
- //
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned toTypeWidth = ScalarVT.getSizeInBits();
+ unsigned ToTypeWidth = ScalarVT.getSizeInBits();
if (SimpleVT.isVector()) {
assert((Isv2x16VT(StoreVT) || StoreVT == MVT::v4i8) &&
"Unexpected vector type");
// v2x16 is stored using st.b32
- toTypeWidth = 32;
+ ToTypeWidth = 32;
}
- unsigned int toType = getLdStRegType(ScalarVT);
+ unsigned int ToType = getLdStRegType(ScalarVT);
// Create the machine instruction DAG
- SDValue Chain = ST->getChain();
SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
SDValue BasePtr = ST->getBasePtr();
SDValue Addr;
@@ -1919,21 +1923,18 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
MVT::SimpleValueType SourceVT =
Value.getNode()->getSimpleValueType(0).SimpleTy;
+ SmallVector<SDValue, 12> Ops({Value, getI32Imm(InstructionOrdering, DL),
+ getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(ToType, DL),
+ getI32Imm(ToTypeWidth, DL)});
+
if (SelectDirectAddr(BasePtr, Addr)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
if (!Opcode)
return false;
- SDValue Ops[] = {Value,
- getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(toType, dl),
- getI32Imm(toTypeWidth, dl),
- Addr,
- Chain};
- NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
+ Ops.append({Addr, Chain});
} else if (PointerSize == 64
? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
: SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
@@ -1942,16 +1943,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
if (!Opcode)
return false;
- SDValue Ops[] = {Value,
- getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(toType, dl),
- getI32Imm(toTypeWidth, dl),
- Base,
- Offset,
- Chain};
- NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
+ Ops.append({Base, Offset, Chain});
} else if (PointerSize == 64
? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
: SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
@@ -1966,17 +1958,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
if (!Opcode)
return false;
-
- SDValue Ops[] = {Value,
- getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(toType, dl),
- getI32Imm(toTypeWidth, dl),
- Base,
- Offset,
- Chain};
- NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
+ Ops.append({Base, Offset, Chain});
} else {
if (PointerSize == 64)
Opcode =
@@ -1989,17 +1971,12 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
if (!Opcode)
return false;
- SDValue Ops[] = {Value,
- getI32Imm(CodeMemorySem, dl),
- getI32Imm(CodeAddrSpace, dl),
- getI32Imm(vecType, dl),
- getI32Imm(toType, dl),
- getI32Imm(toTypeWidth, dl),
- BasePtr,
- Chain};
- NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
+ Ops.append({BasePtr, Chain});
}
+ SDNode *NVPTXST = NVPTXST =
+ CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
+
if (!NVPTXST)
return false;
@@ -2010,11 +1987,9 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
}
bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
- SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
std::optional<unsigned> Opcode;
- SDLoc DL(N);
SDNode *ST;
EVT EltVT = Op1.getValueType();
MemSDNode *MemSD = cast<MemSDNode>(N);
@@ -2029,8 +2004,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
unsigned int PointerSize =
CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
- // Memory Semantic Setting
- unsigned int CodeMemorySem = getCodeMemorySemantic(MemSD, Subtarget);
+ SDLoc DL(N);
+ SDValue Chain = N->getOperand(0);
+ auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, MemSD);
// Type Setting: toType + toTypeWidth
// - for integer type, always use 'u'
@@ -2039,23 +2015,20 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
unsigned ToTypeWidth = ScalarVT.getSizeInBits();
unsigned ToType = getLdStRegType(ScalarVT);
- SmallVector<SDValue, 12> StOps;
+ SmallVector<SDValue, 12> Ops;
SDValue N2;
unsigned VecType;
switch (N->getOpcode()) {
case NVPTXISD::StoreV2:
VecType = NVPTX::PTXLdStInstCode::V2;
- StOps.push_back(N->getOperand(1));
- StOps.push_back(N->getOperand(2));
+ Ops.append({N->getOperand(1), N->getOperand(2)});
N2 = N->getOperand(3);
break;
case NVPTXISD::StoreV4:
VecType = NVPTX::PTXLdStInstCode::V4;
- StOps.push_back(N->getOperand(1));
- StOps.push_back(N->getOperand(2));
- StOps.push_back(N->getOperand(3));
- StOps.push_back(N->getOperand(4));
+ Ops.append({N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ N->getOperand(4)});
N2 = N->getOperand(5);
break;
default:
@@ -2072,11 +2045,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
ToTypeWidth = 32;
}
- StOps.push_back(getI32Imm(CodeMemorySem, DL));
- StOps.push_back(getI32Imm(CodeAddrSpace, DL));
- StOps.push_back(getI32Imm(VecType, DL));
- StOps.push_back(getI32Imm(ToType, DL));
- StOps.push_back(getI32Imm(ToTypeWidth, DL));
+ Ops.append({getI32Imm(InstructionOrdering, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(ToType, DL),
+ getI32Imm(ToTypeWidth, DL)});
if (SelectDirectAddr(N2, Addr)) {
switch (N->getOpcode()) {
@@ -2095,7 +2066,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
NVPTX::STV_f32_v4_avar, std::nullopt);
break;
}
- StOps.push_back(Addr);
+ Ops.push_back(Addr);
} else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (N->getOpcode()) {
@@ -2114,8 +2085,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
break;
}
- StOps.push_back(Base);
- StOps.push_back(Offset);
+ Ops.append({Base, Offset});
} else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
if (PointerSize == 64) {
@@ -2154,8 +2124,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
break;
}
}
- StOps.push_back(Base);
- StOps.push_back(Offset);
+ Ops.append({Base, Offset});
} else {
if (PointerSize == 64) {
switch (N->getOpcode()) {
@@ -2194,15 +2163,15 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
break;
}
}
- StOps.push_back(N2);
+ Ops.push_back(N2);
}
if (!Opcode)
return false;
- StOps.push_back(Chain);
+ Ops.push_back(Chain);
- ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps);
+ ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
@@ -2276,10 +2245,8 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
unsigned OffsetVal = Offset->getAsZExtVal();
- SmallVector<SDValue, 2> Ops;
- Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
- Ops.push_back(Chain);
- Ops.push_back(Glue);
+ SmallVector<SDValue, 2> Ops(
+ {CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32), Chain, Glue});
ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
return true;
@@ -2312,8 +2279,7 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
SmallVector<SDValue, 6> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(N->getOperand(i + 2));
- Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
- Ops.push_back(Chain);
+ Ops.append({CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32), Chain});
// Determine target opcode
// If we have an i1, use an 8-bit store. The lowering code in
@@ -2493,10 +2459,8 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(N->getOperand(i + 3));
- Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
- Ops.push_back(Chain);
- Ops.push_back(Glue);
+ Ops.append({CurDAG->getTargetConstant(ParamVal, DL, MVT::i32),
+ CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32), Chain, Glue});
// Determine target opcode
// If we have an i1, use an 8-bit store. The lowering code in
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 49626d40514859..eac4056599511c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -99,6 +99,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, LoadSDNode *N);
+
+ NVPTX::Ordering insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,
+ MemSDNode *N);
};
class NVPTXDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index a5bdc6fac3ca6c..6a096fa5acea7c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3926,7 +3926,6 @@ def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, /
def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1)
Requires<[hasPTX<60>, hasSM<70>]>;
-
// If PTX<60 or SM<70, we fall back to MEMBAR:
def : Pat<(atomic_fence (i64 4), (i64 1)), (INT_MEMBAR_SYS)>; // acquire(4) sys(1)
def : Pat<(atomic_fence (i64 5), (i64 1)), (INT_MEMBAR_SYS)>; // release(5) sys(1)
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index c15ff6cae1f270..eebd91fefe4f03 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
+#include "NVPTX.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -82,6 +83,36 @@ inline unsigned promoteScalarArgumentSize(unsigned size) {
bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM);
bool Isv2x16VT(EVT VT);
+
+namespace NVPTX {
+
+inline std::string OrderingToCString(Ordering Order) {
+ switch (Order) {
+ case Ordering::NotAtomic:
+ return "NotAtomic";
+ case Ordering::Relaxed:
+ return "Relaxed";
+ case Ordering::Acquire:
+ return "Acquire";
+ case Ordering::Release:
+ return "Release";
+ // case Ordering::AcquireRelease: return "AcquireRelease";
+ case Ordering::SequentiallyConsistent:
+ return "SequentiallyConsistent";
+ case Ordering::Volatile:
+ return "Volatile";
+ case Ordering::RelaxedMMIO:
+ return "RelaxedMMIO";
+ }
+ report_fatal_error("unknown ordering");
+}
+
+inline raw_ostream &operator<<(raw_ostream &O, Ordering Order) {
+ O << OrderingToCString(Order);
+ return O;
}
+} // namespace NVPTX
+} // namespace llvm
+
#endif
diff --git a/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll b/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
index 68915b0f2698bd..9cea33d12027f2 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
@@ -1,169 +1,7 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s
; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %}
-; CHECK-LABEL: generic_plain
-define void @generic_plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load i8, ptr %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i8 %a.add, ptr %a
-
- ; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load i16, ptr %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i16 %b.add, ptr %b
-
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load i32, ptr %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store i32 %c.add, ptr %c
-
- ; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load i64, ptr %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store i64 %d.add, ptr %d
-
- ; CHECK: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load float, ptr %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store float %e.add, ptr %c
-
- ; CHECK: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr %c
-
- ret void
-}
-
-; CHECK-LABEL: generic_volatile
-define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load volatile i8, ptr %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i8 %a.add, ptr %a
-
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load volatile i16, ptr %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i16 %b.add, ptr %b
-
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load volatile i32, ptr %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store volatile i32 %c.add, ptr %c
-
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load volatile i64, ptr %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store volatile i64 %d.add, ptr %d
-
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load volatile float, ptr %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store volatile float %e.add, ptr %c
-
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load volatile double, ptr %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store volatile double %f.add, ptr %c
-
- ret void
-}
-
-; CHECK-LABEL: generic_unordered
-define void @generic_unordered(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr %a unordered, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr %a unordered, align 1
-
- ; CHECK: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr %b unordered, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr %b unordered, align 2
-
- ; CHECK: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr %c unordered, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr %c unordered, align 4
-
- ; CHECK: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr %d unordered, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr %d unordered, align 8
-
- ; CHECK: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr %e unordered, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr %e unordered, align 4
-
- ; CHECK: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr %e unordered, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr %e unordered, align 8
-
- ret void
-}
-
-; CHECK-LABEL: generic_monotonic
-define void @generic_monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr %a monotonic, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr %a monotonic, align 1
-
- ; CHECK: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr %b monotonic, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr %b monotonic, align 2
-
- ; CHECK: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr %c monotonic, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr %c monotonic, align 4
-
- ; CHECK: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr %d monotonic, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr %d monotonic, align 8
-
- ; CHECK: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr %e monotonic, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr %e monotonic, align 4
-
- ; CHECK: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr %e monotonic, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr %e monotonic, align 8
-
- ret void
-}
+;; generic statespace
; CHECK-LABEL: generic_acq_rel
define void @generic_acq_rel(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
@@ -206,335 +44,154 @@ define void @generic_acq_rel(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnam
ret void
}
-; CHECK-LABEL: generic_unordered_volatile
-define void @generic_unordered_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr %a unordered, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr %a unordered, align 1
-
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr %b unordered, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr %b unordered, align 2
-
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr %c unordered, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr %c unordered, align 4
-
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr %d unordered, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr %d unordered, align 8
-
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr %e unordered, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr %e unordered, align 4
-
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr %e unordered, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr %e unordered, align 8
-
- ret void
-}
-
-; CHECK-LABEL: generic_monotonic_volatile
-define void @generic_monotonic_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr %a monotonic, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr %a monotonic, align 1
-
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr %b monotonic, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr %b monotonic, align 2
-
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr %c monotonic, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr %c monotonic, align 4
-
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr %d monotonic, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr %d monotonic, align 8
-
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr %e monotonic, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr %e monotonic, align 4
-
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr %e monotonic, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr %e monotonic, align 8
-
- ret void
-}
-
-;; global statespace
-
-; CHECK-LABEL: global_plain
-define void @global_plain(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
- ; CHECK: ld.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load i8, ptr addrspace(1) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i8 %a.add, ptr addrspace(1) %a
-
- ; CHECK: ld.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load i16, ptr addrspace(1) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i16 %b.add, ptr addrspace(1) %b
-
- ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load i32, ptr addrspace(1) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store i32 %c.add, ptr addrspace(1) %c
-
- ; CHECK: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load i64, ptr addrspace(1) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store i64 %d.add, ptr addrspace(1) %d
-
- ; CHECK: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load float, ptr addrspace(1) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store float %e.add, ptr addrspace(1) %c
-
- ; CHECK: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr addrspace(1) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr addrspace(1) %c
-
- ret void
-}
-
-; CHECK-LABEL: global_volatile
-define void @global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load volatile i8, ptr addrspace(1) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i8 %a.add, ptr addrspace(1) %a
-
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load volatile i16, ptr addrspace(1) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i16 %b.add, ptr addrspace(1) %b
-
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load volatile i32, ptr addrspace(1) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store volatile i32 %c.add, ptr addrspace(1) %c
-
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load volatile i64, ptr addrspace(1) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store volatile i64 %d.add, ptr addrspace(1) %d
-
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load volatile float, ptr addrspace(1) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store volatile float %e.add, ptr addrspace(1) %c
-
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load volatile double, ptr addrspace(1) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store volatile double %f.add, ptr addrspace(1) %c
-
- ret void
-}
-
-; CHECK-LABEL: global_unordered
-define void @global_unordered(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1
+; CHECK-LABEL: generic_acq_rel_volatile
+define void @generic_acq_rel_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr %a acquire, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1
+ ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr %a release, align 1
- ; CHECK: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(1) %b unordered, align 2
+ ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr %b acquire, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(1) %b unordered, align 2
+ ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr %b release, align 2
- ; CHECK: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(1) %c unordered, align 4
+ ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr %c acquire, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(1) %c unordered, align 4
+ ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr %c release, align 4
- ; CHECK: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(1) %d unordered, align 8
+ ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr %d acquire, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(1) %d unordered, align 8
+ ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr %d release, align 8
- ; CHECK: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(1) %e unordered, align 4
+ ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr %e acquire, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(1) %e unordered, align 4
+ ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr %e release, align 4
- ; CHECK: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(1) %e unordered, align 8
+ ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr %e acquire, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(1) %e unordered, align 8
+ ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr %e release, align 8
ret void
}
-; CHECK-LABEL: global_monotonic
-define void @global_monotonic(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
+; CHECK-LABEL: generic_sc
+define void @generic_sc(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr %a seq_cst, align 1
- ; CHECK: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(1) %b monotonic, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(1) %b monotonic, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr %b seq_cst, align 2
- ; CHECK: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(1) %c monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(1) %c monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr %c seq_cst, align 4
- ; CHECK: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(1) %d monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(1) %d monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr %d seq_cst, align 8
- ; CHECK: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(1) %e monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(1) %e monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr %e seq_cst, align 4
- ; CHECK: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(1) %e monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(1) %e monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: global_unordered_volatile
-define void @global_unordered_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1
+; CHECK-LABEL: generic_sc_volatile
+define void @generic_sc_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr %a seq_cst, align 1
- ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(1) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(1) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr %b seq_cst, align 2
- ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(1) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(1) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr %c seq_cst, align 4
- ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(1) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(1) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr %d seq_cst, align 8
- ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(1) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(1) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr %e seq_cst, align 4
- ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(1) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(1) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: global_monotonic_volatile
-define void @global_monotonic_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
-
- ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(1) %b monotonic, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(1) %b monotonic, align 2
-
- ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(1) %c monotonic, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(1) %c monotonic, align 4
-
- ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(1) %d monotonic, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(1) %d monotonic, align 8
-
- ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(1) %e monotonic, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(1) %e monotonic, align 4
-
- ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(1) %e monotonic, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(1) %e monotonic, align 8
-
- ret void
-}
+;; global statespace
; CHECK-LABEL: global_acq_rel
define void @global_acq_rel(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
@@ -618,253 +275,113 @@ define void @global_acq_rel_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, p
ret void
}
-;; shared statespace
-
-; CHECK-LABEL: shared_plain
-define void @shared_plain(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
- ; CHECK: ld.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load i8, ptr addrspace(3) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i8 %a.add, ptr addrspace(3) %a
-
- ; CHECK: ld.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load i16, ptr addrspace(3) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i16 %b.add, ptr addrspace(3) %b
-
- ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load i32, ptr addrspace(3) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store i32 %c.add, ptr addrspace(3) %c
-
- ; CHECK: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load i64, ptr addrspace(3) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store i64 %d.add, ptr addrspace(3) %d
-
- ; CHECK: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load float, ptr addrspace(3) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store float %e.add, ptr addrspace(3) %c
-
- ; CHECK: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr addrspace(3) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr addrspace(3) %c
-
- ret void
-}
-
-; CHECK-LABEL: shared_volatile
-define void @shared_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load volatile i8, ptr addrspace(3) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i8 %a.add, ptr addrspace(3) %a
-
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load volatile i16, ptr addrspace(3) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i16 %b.add, ptr addrspace(3) %b
-
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load volatile i32, ptr addrspace(3) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store volatile i32 %c.add, ptr addrspace(3) %c
-
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load volatile i64, ptr addrspace(3) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store volatile i64 %d.add, ptr addrspace(3) %d
-
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load volatile float, ptr addrspace(3) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store volatile float %e.add, ptr addrspace(3) %c
-
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load volatile double, ptr addrspace(3) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store volatile double %f.add, ptr addrspace(3) %c
-
- ret void
-}
-
-; CHECK-LABEL: shared_unordered
-define void @shared_unordered(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1
-
- ; CHECK: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(3) %b unordered, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(3) %b unordered, align 2
-
- ; CHECK: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(3) %c unordered, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(3) %c unordered, align 4
-
- ; CHECK: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(3) %d unordered, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(3) %d unordered, align 8
-
- ; CHECK: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(3) %e unordered, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(3) %e unordered, align 4
-
- ; CHECK: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(3) %e unordered, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(3) %e unordered, align 8
-
- ret void
-}
-
-; CHECK-LABEL: shared_unordered_volatile
-define void @shared_unordered_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(3) %a unordered, align 1
+; CHECK-LABEL: global_seq_cst
+define void @global_seq_cst(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(1) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(3) %a unordered, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(1) %a seq_cst, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(3) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(1) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(3) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(1) %b seq_cst, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(3) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(1) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(3) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(1) %c seq_cst, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(3) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(1) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(3) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(1) %d seq_cst, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(3) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(1) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(3) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(1) %e seq_cst, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(3) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(1) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(3) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(1) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: shared_monotonic
-define void @shared_monotonic(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
+; CHECK-LABEL: global_seq_cst_volatile
+define void @global_seq_cst_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(1) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(1) %a seq_cst, align 1
- ; CHECK: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(3) %b monotonic, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(1) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(3) %b monotonic, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(1) %b seq_cst, align 2
- ; CHECK: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(3) %c monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(1) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(3) %c monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(1) %c seq_cst, align 4
- ; CHECK: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(3) %d monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(1) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(3) %d monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(1) %d seq_cst, align 8
- ; CHECK: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(3) %e monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(1) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(3) %e monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(1) %e seq_cst, align 4
- ; CHECK: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(3) %e monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(1) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(3) %e monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(1) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: shared_monotonic_volatile
-define void @shared_monotonic_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1
-
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(3) %b monotonic, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(3) %b monotonic, align 2
-
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(3) %c monotonic, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(3) %c monotonic, align 4
-
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(3) %d monotonic, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(3) %d monotonic, align 8
-
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(3) %e monotonic, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(3) %e monotonic, align 4
-
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(3) %e monotonic, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(3) %e monotonic, align 8
-
- ret void
-}
+;; shared statespace
; CHECK-LABEL: shared_acq_rel
define void @shared_acq_rel(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
@@ -948,332 +465,291 @@ define void @shared_acq_rel_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, p
ret void
}
-;; local statespace
-
-; CHECK-LABEL: local_plain
-define void @local_plain(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load i8, ptr addrspace(5) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i8 %a.add, ptr addrspace(5) %a
-
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load i16, ptr addrspace(5) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i16 %b.add, ptr addrspace(5) %b
-
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load i32, ptr addrspace(5) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store i32 %c.add, ptr addrspace(5) %c
-
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load i64, ptr addrspace(5) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store i64 %d.add, ptr addrspace(5) %d
-
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load float, ptr addrspace(5) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store float %e.add, ptr addrspace(5) %c
-
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr addrspace(5) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr addrspace(5) %c
-
- ret void
-}
-
-; CHECK-LABEL: local_volatile
-define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load volatile i8, ptr addrspace(5) %a
+; CHECK-LABEL: shared_seq_cst
+define void @shared_seq_cst(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(3) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i8 %a.add, ptr addrspace(5) %a
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(3) %a seq_cst, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load volatile i16, ptr addrspace(5) %b
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(3) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i16 %b.add, ptr addrspace(5) %b
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(3) %b seq_cst, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load volatile i32, ptr addrspace(5) %c
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(3) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store volatile i32 %c.add, ptr addrspace(5) %c
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(3) %c seq_cst, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load volatile i64, ptr addrspace(5) %d
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(3) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store volatile i64 %d.add, ptr addrspace(5) %d
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(3) %d seq_cst, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load volatile float, ptr addrspace(5) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store volatile float %e.add, ptr addrspace(5) %c
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(3) %e seq_cst, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(3) %e seq_cst, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load volatile double, ptr addrspace(5) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store volatile double %f.add, ptr addrspace(5) %c
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(3) %e seq_cst, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(3) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: local_unordered
-define void @local_unordered(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(5) %a unordered, align 1
+; CHECK-LABEL: shared_seq_cst_volatile
+define void @shared_seq_cst_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(3) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(5) %a unordered, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(3) %a seq_cst, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(5) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(3) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(5) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(3) %b seq_cst, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(5) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(3) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(5) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(3) %c seq_cst, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(5) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(3) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(5) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(3) %d seq_cst, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(5) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(3) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(5) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(3) %e seq_cst, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(5) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(3) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(5) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(3) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: local_unordered_volatile
-define void @local_unordered_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(5) %a unordered, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(5) %a unordered, align 1
-
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(5) %b unordered, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(5) %b unordered, align 2
-
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(5) %c unordered, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(5) %c unordered, align 4
-
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(5) %d unordered, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(5) %d unordered, align 8
-
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(5) %e unordered, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(5) %e unordered, align 4
-
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(5) %e unordered, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(5) %e unordered, align 8
+;; local statespace
- ret void
-}
+; CHECK-LABEL: local_acq_rel
+define void @local_acq_rel(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
-; CHECK-LABEL: local_monotonic
-define void @local_monotonic(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
+ %a.load = load atomic i8, ptr addrspace(5) %a acquire, align 1
%a.add = add i8 %a.load, 1
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1
+ store atomic i8 %a.add, ptr addrspace(5) %a release, align 1
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(5) %b monotonic, align 2
+ %b.load = load atomic i16, ptr addrspace(5) %b acquire, align 2
%b.add = add i16 %b.load, 1
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(5) %b monotonic, align 2
+ store atomic i16 %b.add, ptr addrspace(5) %b release, align 2
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(5) %c monotonic, align 4
+ %c.load = load atomic i32, ptr addrspace(5) %c acquire, align 4
%c.add = add i32 %c.load, 1
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(5) %c monotonic, align 4
+ store atomic i32 %c.add, ptr addrspace(5) %c release, align 4
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(5) %d monotonic, align 8
+ %d.load = load atomic i64, ptr addrspace(5) %d acquire, align 8
%d.add = add i64 %d.load, 1
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(5) %d monotonic, align 8
+ store atomic i64 %d.add, ptr addrspace(5) %d release, align 8
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(5) %e monotonic, align 4
+ %e.load = load atomic float, ptr addrspace(5) %e acquire, align 4
%e.add = fadd float %e.load, 1.0
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(5) %e monotonic, align 4
+ store atomic float %e.add, ptr addrspace(5) %e release, align 4
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(5) %e monotonic, align 8
+ %f.load = load atomic double, ptr addrspace(5) %e acquire, align 8
%f.add = fadd double %f.load, 1.
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(5) %e monotonic, align 8
+ store atomic double %f.add, ptr addrspace(5) %e release, align 8
ret void
}
-; CHECK-LABEL: local_monotonic_volatile
-define void @local_monotonic_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+; CHECK-LABEL: local_acq_rel_volatile
+define void @local_acq_rel_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
+ %a.load = load atomic volatile i8, ptr addrspace(5) %a acquire, align 1
%a.add = add i8 %a.load, 1
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1
+ store atomic volatile i8 %a.add, ptr addrspace(5) %a release, align 1
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(5) %b monotonic, align 2
+ %b.load = load atomic volatile i16, ptr addrspace(5) %b acquire, align 2
%b.add = add i16 %b.load, 1
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(5) %b monotonic, align 2
+ store atomic volatile i16 %b.add, ptr addrspace(5) %b release, align 2
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(5) %c monotonic, align 4
+ %c.load = load atomic volatile i32, ptr addrspace(5) %c acquire, align 4
%c.add = add i32 %c.load, 1
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(5) %c monotonic, align 4
+ store atomic volatile i32 %c.add, ptr addrspace(5) %c release, align 4
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(5) %d monotonic, align 8
+ %d.load = load atomic volatile i64, ptr addrspace(5) %d acquire, align 8
%d.add = add i64 %d.load, 1
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(5) %d monotonic, align 8
+ store atomic volatile i64 %d.add, ptr addrspace(5) %d release, align 8
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(5) %e monotonic, align 4
+ %e.load = load atomic volatile float, ptr addrspace(5) %e acquire, align 4
%e.add = fadd float %e.load, 1.0
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(5) %e monotonic, align 4
+ store atomic volatile float %e.add, ptr addrspace(5) %e release, align 4
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(5) %e monotonic, align 8
+ %f.load = load atomic volatile double, ptr addrspace(5) %e acquire, align 8
%f.add = fadd double %f.load, 1.
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(5) %e monotonic, align 8
+ store atomic volatile double %f.add, ptr addrspace(5) %e release, align 8
ret void
}
-; CHECK-LABEL: local_acq_rel
-define void @local_acq_rel(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+; CHECK-LABEL: local_seq_cst
+define void @local_seq_cst(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(5) %a acquire, align 1
+ %a.load = load atomic i8, ptr addrspace(5) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(5) %a release, align 1
+ store atomic i8 %a.add, ptr addrspace(5) %a seq_cst, align 1
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(5) %b acquire, align 2
+ %b.load = load atomic i16, ptr addrspace(5) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(5) %b release, align 2
+ store atomic i16 %b.add, ptr addrspace(5) %b seq_cst, align 2
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(5) %c acquire, align 4
+ %c.load = load atomic i32, ptr addrspace(5) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(5) %c release, align 4
+ store atomic i32 %c.add, ptr addrspace(5) %c seq_cst, align 4
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(5) %d acquire, align 8
+ %d.load = load atomic i64, ptr addrspace(5) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(5) %d release, align 8
+ store atomic i64 %d.add, ptr addrspace(5) %d seq_cst, align 8
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(5) %e acquire, align 4
+ %e.load = load atomic float, ptr addrspace(5) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(5) %e release, align 4
+ store atomic float %e.add, ptr addrspace(5) %e seq_cst, align 4
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(5) %e acquire, align 8
+ %f.load = load atomic double, ptr addrspace(5) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(5) %e release, align 8
+ store atomic double %f.add, ptr addrspace(5) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: local_acq_rel_volatile
-define void @local_acq_rel_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+; CHECK-LABEL: local_seq_cst_volatile
+define void @local_seq_cst_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(5) %a acquire, align 1
+ %a.load = load atomic volatile i8, ptr addrspace(5) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(5) %a release, align 1
+ store atomic volatile i8 %a.add, ptr addrspace(5) %a seq_cst, align 1
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(5) %b acquire, align 2
+ %b.load = load atomic volatile i16, ptr addrspace(5) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(5) %b release, align 2
+ store atomic volatile i16 %b.add, ptr addrspace(5) %b seq_cst, align 2
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(5) %c acquire, align 4
+ %c.load = load atomic volatile i32, ptr addrspace(5) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(5) %c release, align 4
+ store atomic volatile i32 %c.add, ptr addrspace(5) %c seq_cst, align 4
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(5) %d acquire, align 8
+ %d.load = load atomic volatile i64, ptr addrspace(5) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(5) %d release, align 8
+ store atomic volatile i64 %d.add, ptr addrspace(5) %d seq_cst, align 8
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(5) %e acquire, align 4
+ %e.load = load atomic volatile float, ptr addrspace(5) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(5) %e release, align 4
+ store atomic volatile float %e.add, ptr addrspace(5) %e seq_cst, align 4
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(5) %e acquire, align 8
+ %f.load = load atomic volatile double, ptr addrspace(5) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(5) %e release, align 8
+ store atomic volatile double %f.add, ptr addrspace(5) %e seq_cst, align 8
+
+ ; TODO: LLVM IR Verifier does not support atomics on vector types.
ret void
}
+
+; TODO: add plain,atomic,volatile,atomic volatile tests
+; for .const and .param statespaces
\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/load-store.ll b/llvm/test/CodeGen/NVPTX/load-store.ll
index 4c5e0920ce1aeb..aac73f71a6766c 100644
--- a/llvm/test/CodeGen/NVPTX/load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store.ll
@@ -1,5 +1,13 @@
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70
+; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %}
+
+; TODO: add i1, <8 x i8>, and <6 x i8> vector tests.
+
+; TODO: add test for vectors that exceed 128-bit length
+; Per https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#vectors
+; vectors cannot exceed 128-bit in length, i.e., .v4.u64 is not allowed.
; generic statespace
@@ -36,10 +44,76 @@ define void @generic_plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
store float %e.add, ptr %c
; CHECK: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr %c
+ %f.load = load double, ptr %d
%f.add = fadd double %f.load, 1.
; CHECK: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr %c
+ store double %f.add, ptr %d
+
+ ; TODO: make the lowering of this weak vector ops consistent with
+ ; the ones of the next tests. This test lowers to a weak PTX
+ ; vector op, but next test lowers to a vector PTX op.
+ ; CHECK: ld.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load <2 x i8>, ptr %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <2 x i8> %h.add, ptr %b
+
+ ; TODO: make the lowering of this weak vector ops consistent with
+ ; the ones of the previous test. This test lowers to a weak
+ ; PTX scalar op, but prior test lowers to a vector PTX op.
+ ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load <4 x i8>, ptr %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <4 x i8> %i.add, ptr %c
+
+ ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load <2 x i16>, ptr %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <2 x i16> %j.add, ptr %c
+
+ ; CHECK: ld.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load <4 x i16>, ptr %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <4 x i16> %k.add, ptr %d
+
+ ; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load <2 x i32>, ptr %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <2 x i32> %l.add, ptr %d
+
+ ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load <4 x i32>, ptr %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <4 x i32> %m.add, ptr %d
+
+ ; CHECK: ld.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load <2 x i64>, ptr %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store <2 x i64> %n.add, ptr %d
+
+ ; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load <2 x float>, ptr %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <2 x float> %o.add, ptr %d
+
+ ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load <4 x float>, ptr %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <4 x float> %p.add, ptr %d
+
+ ; CHECK: ld.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load <2 x double>, ptr %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store <2 x double> %q.add, ptr %d
ret void
}
@@ -82,45 +156,136 @@ define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr
; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr %c
+ ; TODO: volatile, atomic, and volatile atomic memory operations on vector types.
+ ; Currently, LLVM:
+ ; - does not allow atomic operations on vectors.
+ ; - it allows volatile operations but not clear what that means.
+ ; Following both semantics make sense in general and PTX supports both:
+ ; - volatile/atomic/volatile atomic applies to the whole vector
+ ; - volatile/atomic/volatile atomic applies elementwise
+ ; Actions required:
+ ; - clarify LLVM semantics for volatile on vectors and align the NVPTX backend with those
+ ; Below tests show that the current implementation picks the semantics in an inconsistent way
+ ; * volatile <2 x i8> lowers to "elementwise volatile"
+ ; * <4 x i8> lowers to "full vector volatile"
+ ; - provide support for vector atomics, e.g., by extending LLVM IR or via intrinsics
+ ; - update tests in load-store-sm70.ll as well.
+
+ ; TODO: make this operation consistent with the one for <4 x i8>
+ ; This operation lowers to a "element wise volatile PTX operation".
+ ; CHECK: ld.volatile.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load volatile <2 x i8>, ptr %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.volatile.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <2 x i8> %h.add, ptr %b
+
+ ; TODO: make this operation consistent with the one for <2 x i8>
+ ; This operation lowers to a "full vector volatile PTX operation".
+ ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load volatile <4 x i8>, ptr %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <4 x i8> %i.add, ptr %c
+
+ ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load volatile <2 x i16>, ptr %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <2 x i16> %j.add, ptr %c
+
+ ; CHECK: ld.volatile.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load volatile <4 x i16>, ptr %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.volatile.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <4 x i16> %k.add, ptr %d
+
+ ; CHECK: ld.volatile.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load volatile <2 x i32>, ptr %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.volatile.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <2 x i32> %l.add, ptr %d
+
+ ; CHECK: ld.volatile.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load volatile <4 x i32>, ptr %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.volatile.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <4 x i32> %m.add, ptr %d
+
+ ; CHECK: ld.volatile.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load volatile <2 x i64>, ptr %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.volatile.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store volatile <2 x i64> %n.add, ptr %d
+
+ ; CHECK: ld.volatile.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load volatile <2 x float>, ptr %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.volatile.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <2 x float> %o.add, ptr %d
+
+ ; CHECK: ld.volatile.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load volatile <4 x float>, ptr %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.volatile.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <4 x float> %p.add, ptr %d
+
+ ; CHECK: ld.volatile.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load volatile <2 x double>, ptr %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.volatile.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store volatile <2 x double> %q.add, ptr %d
+
ret void
}
; CHECK-LABEL: generic_monotonic
define void @generic_monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr %a monotonic, align 1
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr %b monotonic, align 2
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr %c monotonic, align 4
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr %d monotonic, align 8
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr %e monotonic, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr %e monotonic, align 4
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr %e monotonic, align 8
ret void
@@ -169,40 +334,52 @@ define void @generic_monotonic_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e)
; CHECK-LABEL: generic_unordered
define void @generic_unordered(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr %a unordered, align 1
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr %b unordered, align 2
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr %c unordered, align 4
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr %d unordered, align 8
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr %e unordered, align 4
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr %e unordered, align 8
ret void
@@ -289,6 +466,66 @@ define void @global_plain(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspac
; CHECK: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(1) %c
+ ; CHECK: ld.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load <2 x i8>, ptr addrspace(1) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <2 x i8> %h.add, ptr addrspace(1) %b
+
+ ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load <4 x i8>, ptr addrspace(1) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <4 x i8> %i.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load <2 x i16>, ptr addrspace(1) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <2 x i16> %j.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load <4 x i16>, ptr addrspace(1) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <4 x i16> %k.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load <2 x i32>, ptr addrspace(1) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <2 x i32> %l.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load <4 x i32>, ptr addrspace(1) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <4 x i32> %m.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load <2 x i64>, ptr addrspace(1) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store <2 x i64> %n.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load <2 x float>, ptr addrspace(1) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <2 x float> %o.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load <4 x float>, ptr addrspace(1) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <4 x float> %p.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load <2 x double>, ptr addrspace(1) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store <2 x double> %q.add, ptr addrspace(1) %d
+
ret void
}
@@ -330,45 +567,117 @@ define void @global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrs
; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(1) %c
+ ; CHECK: ld.volatile.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load volatile <2 x i8>, ptr addrspace(1) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.volatile.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile<2 x i8> %h.add, ptr addrspace(1) %b
+
+ ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load volatile <4 x i8>, ptr addrspace(1) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile<4 x i8> %i.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load volatile <2 x i16>, ptr addrspace(1) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile<2 x i16> %j.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.volatile.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load volatile <4 x i16>, ptr addrspace(1) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.volatile.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile<4 x i16> %k.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load volatile <2 x i32>, ptr addrspace(1) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.volatile.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile<2 x i32> %l.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load volatile <4 x i32>, ptr addrspace(1) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.volatile.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile<4 x i32> %m.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load volatile <2 x i64>, ptr addrspace(1) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.volatile.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store volatile<2 x i64> %n.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load volatile <2 x float>, ptr addrspace(1) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.volatile.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile<2 x float> %o.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load volatile <4 x float>, ptr addrspace(1) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.volatile.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile<4 x float> %p.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load volatile <2 x double>, ptr addrspace(1) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.volatile.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store volatile<2 x double> %q.add, ptr addrspace(1) %d
+
ret void
}
; CHECK-LABEL: global_monotonic
define void @global_monotonic(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(1) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(1) %b monotonic, align 2
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(1) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(1) %c monotonic, align 4
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(1) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(1) %d monotonic, align 8
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(1) %e monotonic, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(1) %e monotonic, align 4
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(1) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(1) %e monotonic, align 8
ret void
@@ -376,40 +685,52 @@ define void @global_monotonic(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addr
; CHECK-LABEL: global_monotonic_volatile
define void @global_monotonic_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(1) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(1) %b monotonic, align 2
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(1) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(1) %c monotonic, align 4
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(1) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(1) %d monotonic, align 8
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(1) %e monotonic, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(1) %e monotonic, align 4
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(1) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(1) %e monotonic, align 8
ret void
@@ -417,40 +738,52 @@ define void @global_monotonic_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b,
; CHECK-LABEL: global_unordered
define void @global_unordered(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(1) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(1) %b unordered, align 2
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(1) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(1) %c unordered, align 4
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(1) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(1) %d unordered, align 8
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(1) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(1) %e unordered, align 4
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(1) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(1) %e unordered, align 8
ret void
@@ -458,40 +791,52 @@ define void @global_unordered(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addr
; CHECK-LABEL: global_unordered_volatile
define void @global_unordered_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(1) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(1) %b unordered, align 2
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(1) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(1) %c unordered, align 4
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(1) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(1) %d unordered, align 8
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(1) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(1) %e unordered, align 4
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(1) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(1) %e unordered, align 8
ret void
@@ -537,6 +882,66 @@ define void @shared_plain(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspac
; CHECK: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(3) %c
+ ; CHECK: ld.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load <2 x i8>, ptr addrspace(3) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <2 x i8> %h.add, ptr addrspace(3) %b
+
+ ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load <4 x i8>, ptr addrspace(3) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <4 x i8> %i.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load <2 x i16>, ptr addrspace(3) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <2 x i16> %j.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load <4 x i16>, ptr addrspace(3) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <4 x i16> %k.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load <2 x i32>, ptr addrspace(3) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <2 x i32> %l.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load <4 x i32>, ptr addrspace(3) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <4 x i32> %m.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load <2 x i64>, ptr addrspace(3) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store <2 x i64> %n.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load <2 x float>, ptr addrspace(3) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <2 x float> %o.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load <4 x float>, ptr addrspace(3) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <4 x float> %p.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load <2 x double>, ptr addrspace(3) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store <2 x double> %q.add, ptr addrspace(3) %d
+
ret void
}
@@ -578,45 +983,119 @@ define void @shared_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrs
; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(3) %c
+ ; CHECK: ld.volatile.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load volatile <2 x i8>, ptr addrspace(3) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.volatile.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <2 x i8> %h.add, ptr addrspace(3) %b
+
+ ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load volatile <4 x i8>, ptr addrspace(3) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <4 x i8> %i.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load volatile <2 x i16>, ptr addrspace(3) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <2 x i16> %j.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.volatile.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load volatile <4 x i16>, ptr addrspace(3) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.volatile.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <4 x i16> %k.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load volatile <2 x i32>, ptr addrspace(3) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.volatile.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <2 x i32> %l.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load volatile <4 x i32>, ptr addrspace(3) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.volatile.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <4 x i32> %m.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load volatile <2 x i64>, ptr addrspace(3) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.volatile.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store volatile <2 x i64> %n.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load volatile <2 x float>, ptr addrspace(3) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.volatile.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <2 x float> %o.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load volatile <4 x float>, ptr addrspace(3) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.volatile.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <4 x float> %p.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load volatile <2 x double>, ptr addrspace(3) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.volatile.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store volatile <2 x double> %q.add, ptr addrspace(3) %d
+
ret void
}
; CHECK-LABEL: shared_monotonic
define void @shared_monotonic(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; TODO: optimize .sys.shared to .cta.shared or .cluster.shared.
+
+ ; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(3) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(3) %b monotonic, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(3) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(3) %c monotonic, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(3) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(3) %d monotonic, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(3) %e monotonic, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(3) %e monotonic, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(3) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(3) %e monotonic, align 8
ret void
@@ -665,40 +1144,54 @@ define void @shared_monotonic_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b,
; CHECK-LABEL: shared_unordered
define void @shared_unordered(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; TODO: optimize .sys.shared to .cta.shared or .cluster.shared.
+
+ ; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(3) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(3) %b unordered, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(3) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(3) %c unordered, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(3) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(3) %d unordered, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(3) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(3) %e unordered, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(3) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(3) %e unordered, align 8
ret void
@@ -785,11 +1278,74 @@ define void @local_plain(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(5) %c
+ ; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load <2 x i8>, ptr addrspace(5) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <2 x i8> %h.add, ptr addrspace(5) %b
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load <4 x i8>, ptr addrspace(5) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <4 x i8> %i.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load <2 x i16>, ptr addrspace(5) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <2 x i16> %j.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load <4 x i16>, ptr addrspace(5) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <4 x i16> %k.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load <2 x i32>, ptr addrspace(5) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <2 x i32> %l.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load <4 x i32>, ptr addrspace(5) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <4 x i32> %m.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load <2 x i64>, ptr addrspace(5) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store <2 x i64> %n.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load <2 x float>, ptr addrspace(5) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <2 x float> %o.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load <4 x float>, ptr addrspace(5) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <4 x float> %p.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load <2 x double>, ptr addrspace(5) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store <2 x double> %q.add, ptr addrspace(5) %d
+
ret void
}
; CHECK-LABEL: local_volatile
define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using volatile operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load volatile i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
@@ -826,11 +1382,74 @@ define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrsp
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(5) %c
+ ; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load volatile <2 x i8>, ptr addrspace(5) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <2 x i8> %h.add, ptr addrspace(5) %b
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load volatile <4 x i8>, ptr addrspace(5) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <4 x i8> %i.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load volatile <2 x i16>, ptr addrspace(5) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <2 x i16> %j.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load volatile <4 x i16>, ptr addrspace(5) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <4 x i16> %k.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load volatile <2 x i32>, ptr addrspace(5) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <2 x i32> %l.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load volatile <4 x i32>, ptr addrspace(5) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <4 x i32> %m.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load volatile <2 x i64>, ptr addrspace(5) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store volatile <2 x i64> %n.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load volatile <2 x float>, ptr addrspace(5) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <2 x float> %o.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load volatile <4 x float>, ptr addrspace(5) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <4 x float> %p.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load volatile <2 x double>, ptr addrspace(5) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store volatile <2 x double> %q.add, ptr addrspace(5) %d
+
ret void
}
; CHECK-LABEL: local_monotonic
define void @local_monotonic(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
@@ -872,6 +1491,9 @@ define void @local_monotonic(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrs
; CHECK-LABEL: local_monotonic_volatile
define void @local_monotonic_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by generating atomic or volatile operations
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
@@ -992,3 +1614,6 @@ define void @local_unordered_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b,
ret void
}
+
+; TODO: add plain,atomic,volatile,atomic volatile tests
+; for .const and .param statespaces
\ No newline at end of file
More information about the llvm-commits
mailing list