[llvm] [NVPTX] Load/Store/Fence syncscope support (PR #106101)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 26 09:47:48 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: None (gonzalobg)
<details>
<summary>Changes</summary>
Adds "initial" support for `syncscope` to the NVPTX backend `load`/`store`/`fence` instructions.
Atomic Read-Modify-Write operations intentionally not supported as part of this initial PR.
---
Patch is 367.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106101.diff
14 Files Affected:
- (modified) llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp (+42-20)
- (modified) llvm/lib/Target/NVPTX/NVPTX.h (+12-2)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+156-22)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (+19-2)
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+130-118)
- (modified) llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp (+3-4)
- (modified) llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp (+12)
- (modified) llvm/lib/Target/NVPTX/NVPTXSubtarget.h (+3)
- (modified) llvm/lib/Target/NVPTX/NVPTXUtilities.h (+29-4)
- (added) llvm/test/CodeGen/NVPTX/fence-sm-90.ll (+30)
- (modified) llvm/test/CodeGen/NVPTX/fence.ll (+71-5)
- (modified) llvm/test/CodeGen/NVPTX/load-store-sm-70.ll (+3078-298)
- (added) llvm/test/CodeGen/NVPTX/load-store-sm-90.ll (+1423)
- (modified) llvm/test/CodeGen/NVPTX/load-store.ll (+251-256)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 5b568b0487b45a..2a44ce0273ee1b 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -233,46 +233,68 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
auto Ordering = NVPTX::Ordering(Imm);
switch (Ordering) {
case NVPTX::Ordering::NotAtomic:
- break;
- case NVPTX::Ordering::Volatile:
- O << ".volatile";
- break;
+ return;
case NVPTX::Ordering::Relaxed:
- O << ".relaxed.sys";
- break;
+ O << ".relaxed";
+ return;
case NVPTX::Ordering::Acquire:
- O << ".acquire.sys";
- break;
+ O << ".acquire";
+ return;
case NVPTX::Ordering::Release:
- O << ".release.sys";
- break;
+ O << ".release";
+ return;
+ case NVPTX::Ordering::Volatile:
+ O << ".volatile";
+ return;
case NVPTX::Ordering::RelaxedMMIO:
- O << ".mmio.relaxed.sys";
- break;
+ O << ".mmio.relaxed";
+ return;
default:
report_fatal_error(formatv(
- "NVPTX LdStCode Printer does not support \"{}\" sem modifier.",
- OrderingToCString(Ordering)));
+ "NVPTX LdStCode Printer does not support \"{}\" sem modifier. "
+ "Loads/Stores cannot be AcquireRelease or SequentiallyConsistent.",
+ OrderingToString(Ordering)));
+ }
+ } else if (!strcmp(Modifier, "sco")) {
+ auto S = NVPTX::Scope(Imm);
+ switch (S) {
+ case NVPTX::Scope::Thread:
+ return;
+ case NVPTX::Scope::System:
+ O << ".sys";
+ return;
+ case NVPTX::Scope::Block:
+ O << ".cta";
+ return;
+ case NVPTX::Scope::Cluster:
+ O << ".cluster";
+ return;
+ case NVPTX::Scope::Device:
+ O << ".gpu";
+ return;
}
+ report_fatal_error(formatv(
+ "NVPTX LdStCode Printer does not support \"{}\" sco modifier.",
+ ScopeToString(S)));
} else if (!strcmp(Modifier, "addsp")) {
switch (Imm) {
case NVPTX::PTXLdStInstCode::GLOBAL:
O << ".global";
- break;
+ return;
case NVPTX::PTXLdStInstCode::SHARED:
O << ".shared";
- break;
+ return;
case NVPTX::PTXLdStInstCode::LOCAL:
O << ".local";
- break;
+ return;
case NVPTX::PTXLdStInstCode::PARAM:
O << ".param";
- break;
+ return;
case NVPTX::PTXLdStInstCode::CONSTANT:
O << ".const";
- break;
+ return;
case NVPTX::PTXLdStInstCode::GENERIC:
- break;
+ return;
default:
llvm_unreachable("Wrong Address Space");
}
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index f6f6acb9e13c90..b5624f9212ea27 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -117,12 +117,22 @@ enum Ordering : OrderingUnderlyingType {
// Consume = 3, // Unimplemented in LLVM; NVPTX would map to "Acquire"
Acquire = (OrderingUnderlyingType)AtomicOrdering::Acquire,
Release = (OrderingUnderlyingType)AtomicOrdering::Release,
- // AcquireRelease = 6, // TODO
+ AcquireRelease = (OrderingUnderlyingType)AtomicOrdering::AcquireRelease,
SequentiallyConsistent =
(OrderingUnderlyingType)AtomicOrdering::SequentiallyConsistent,
Volatile = SequentiallyConsistent + 1,
RelaxedMMIO = Volatile + 1,
- LAST = RelaxedMMIO
+ LASTORDERING = RelaxedMMIO
+};
+
+using ScopeUnderlyingType = unsigned int;
+enum Scope : ScopeUnderlyingType {
+ Thread = 0,
+ System = 1,
+ Block = 2,
+ Cluster = 3,
+ Device = 4,
+ LASTSCOPE = Device
};
namespace PTXLdStInstCode {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 4f0bc1a2044642..f04796fcdd49fe 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -59,6 +59,7 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<NVPTXSubtarget>();
+ Scopes = NVPTXScopes(MF.getFunction().getContext());
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -106,6 +107,10 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
if (tryStore(N))
return;
break;
+ case ISD::ATOMIC_FENCE:
+ if (tryFence(N))
+ return;
+ break;
case ISD::EXTRACT_VECTOR_ELT:
if (tryEXTRACT_VECTOR_ELEMENT(N))
return;
@@ -915,6 +920,42 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
} // namespace
+NVPTX::Scope NVPTXDAGToDAGISel::getOperationScope(MemSDNode *N,
+ NVPTX::Ordering Ord) const {
+ switch (Ord) {
+ case NVPTX::Ordering::NotAtomic:
+ case NVPTX::Ordering::Volatile: // Non-atomic volatile operations
+ // NVPTX uses Thread scope as the scope of non-atomic operations.
+ return NVPTX::Scope::Thread;
+ case NVPTX::Ordering::RelaxedMMIO:
+ // RelaxedMMIO operations are always system scope.
+ // If a RelaxedMMIO order was generated from an atomic volatile operation
+ // with a smaller thread scope, we bump it here to system scope.
+ return NVPTX::Scope::System;
+ case NVPTX::Ordering::Relaxed:
+ case NVPTX::Ordering::Acquire:
+ case NVPTX::Ordering::Release:
+ case NVPTX::Ordering::AcquireRelease:
+ case NVPTX::Ordering::SequentiallyConsistent:
+ auto S = Scopes[N->getSyncScopeID()];
+
+ // Atomic operations must have a scope greater than thread.
+ if (S == NVPTX::Scope::Thread)
+ report_fatal_error(
+ formatv("Atomics need scope > \"{}\".", ScopeToString(S)));
+
+ // If scope is cluster, clusters must be supported.
+ if (S == NVPTX::Scope::Cluster)
+ Subtarget->requireClusters("cluster scope");
+
+ // If operation is volatile, then its scope is system.
+ if (N->isVolatile())
+ S = NVPTX::Scope::System;
+
+ return S;
+ }
+}
+
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
unsigned CodeAddrSpace, MachineFunction *F) {
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
@@ -957,33 +998,86 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
});
}
-NVPTX::Ordering NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL,
- SDValue &Chain,
- MemSDNode *N) {
+static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
+ NVPTXSubtarget const *T) {
+ if (S == NVPTX::Scope::Cluster)
+ T->requireClusters(".cluster scope fence");
+
+ switch (O) {
+ case NVPTX::Ordering::Acquire:
+ case NVPTX::Ordering::Release:
+ case NVPTX::Ordering::AcquireRelease: {
+ switch (S) {
+ case NVPTX::Scope::System:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_sys
+ : NVPTX::INT_MEMBAR_SYS;
+ case NVPTX::Scope::Block:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_cta
+ : NVPTX::INT_MEMBAR_CTA;
+ case NVPTX::Scope::Cluster:
+ return NVPTX::atomic_thread_fence_acq_rel_cluster;
+ case NVPTX::Scope::Device:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acq_rel_gpu
+ : NVPTX::INT_MEMBAR_GL;
+ case NVPTX::Scope::Thread:
+ report_fatal_error(
+ formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
+ ScopeToString(S)));
+ }
+ }
+ case NVPTX::Ordering::SequentiallyConsistent: {
+ switch (S) {
+ case NVPTX::Scope::System:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_sys
+ : NVPTX::INT_MEMBAR_SYS;
+ case NVPTX::Scope::Block:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_cta
+ : NVPTX::INT_MEMBAR_CTA;
+ case NVPTX::Scope::Cluster:
+ return NVPTX::atomic_thread_fence_seq_cst_cluster;
+ case NVPTX::Scope::Device:
+ return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_seq_cst_gpu
+ : NVPTX::INT_MEMBAR_GL;
+ case NVPTX::Scope::Thread:
+ report_fatal_error(formatv("Unsupported scope \"{}\" for seq_cst fence.",
+ ScopeToString(S)));
+ }
+ }
+ case NVPTX::Ordering::NotAtomic:
+ case NVPTX::Ordering::Relaxed:
+ case NVPTX::Ordering::Volatile:
+ case NVPTX::Ordering::RelaxedMMIO:
+ report_fatal_error(
+ formatv("Unsupported \"{}\" ordering and \"{}\" scope for fence.",
+ OrderingToString(O), ScopeToString(S)));
+ }
+}
+
+std::pair<NVPTX::Ordering, NVPTX::Scope>
+NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,
+ MemSDNode *N) {
// Some memory instructions - loads, stores, atomics - need an extra fence
// instruction. Get the memory order of the instruction, and that of its
// fence, if any.
auto [InstructionOrdering, FenceOrdering] =
getOperationOrderings(N, Subtarget);
+ auto Scope = getOperationScope(N, InstructionOrdering);
// If a fence is required before the operation, insert it:
switch (NVPTX::Ordering(FenceOrdering)) {
case NVPTX::Ordering::NotAtomic:
break;
case NVPTX::Ordering::SequentiallyConsistent: {
- unsigned Op = Subtarget->hasMemoryOrdering()
- ? NVPTX::atomic_thread_fence_seq_cst_sys
- : NVPTX::INT_MEMBAR_SYS;
+ auto Op = getFenceOp(FenceOrdering, Scope, Subtarget);
Chain = SDValue(CurDAG->getMachineNode(Op, DL, MVT::Other, Chain), 0);
break;
}
default:
report_fatal_error(
formatv("Unexpected fence ordering: \"{}\".",
- OrderingToCString(NVPTX::Ordering(FenceOrdering))));
+ OrderingToString(NVPTX::Ordering(FenceOrdering))));
}
-
- return InstructionOrdering;
+ return std::make_pair(InstructionOrdering, Scope);
}
bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
@@ -1154,7 +1248,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
- auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, LD);
+ auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, LD);
// Type Setting: fromType + fromTypeWidth
//
@@ -1189,7 +1283,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
std::optional<unsigned> Opcode;
MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
- SmallVector<SDValue, 12> Ops({getI32Imm(InstructionOrdering, DL),
+ SmallVector<SDValue, 12> Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL)});
@@ -1266,7 +1360,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
- auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, MemSD);
+ auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, MemSD);
// Vector Setting
MVT SimpleVT = LoadedVT.getSimpleVT();
@@ -1319,7 +1413,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
std::optional<unsigned> Opcode;
SDNode *LD;
- SmallVector<SDValue, 12> Ops({getI32Imm(InstructionOrdering, DL),
+ SmallVector<SDValue, 12> Ops({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL)});
@@ -1895,7 +1989,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
SDLoc DL(N);
SDValue Chain = ST->getChain();
- auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, ST);
+ auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, ST);
// Vector Setting
MVT SimpleVT = StoreVT.getSimpleVT();
@@ -1923,10 +2017,10 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
MVT::SimpleValueType SourceVT =
Value.getNode()->getSimpleValueType(0).SimpleTy;
- SmallVector<SDValue, 12> Ops({Value, getI32Imm(InstructionOrdering, DL),
- getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(ToType, DL),
- getI32Imm(ToTypeWidth, DL)});
+ SmallVector<SDValue, 12> Ops(
+ {Value, getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
+ getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL),
+ getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)});
if (SelectDirectAddr(BasePtr, Addr)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
@@ -2005,7 +2099,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
- auto InstructionOrdering = insertMemoryInstructionFence(DL, Chain, MemSD);
+ auto [Ordering, Scope] = insertMemoryInstructionFence(DL, Chain, MemSD);
// Type Setting: toType + toTypeWidth
// - for integer type, always use 'u'
@@ -2044,9 +2138,9 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
ToTypeWidth = 32;
}
- Ops.append({getI32Imm(InstructionOrdering, DL), getI32Imm(CodeAddrSpace, DL),
- getI32Imm(VecType, DL), getI32Imm(ToType, DL),
- getI32Imm(ToTypeWidth, DL)});
+ Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
+ getI32Imm(CodeAddrSpace, DL), getI32Imm(VecType, DL),
+ getI32Imm(ToType, DL), getI32Imm(ToTypeWidth, DL)});
if (SelectDirectAddr(N2, Addr)) {
switch (N->getOpcode()) {
@@ -4064,3 +4158,43 @@ unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
}
}
}
+
+bool NVPTXDAGToDAGISel::tryFence(SDNode *N) {
+ SDLoc DL(N);
+ assert(N->getOpcode() == ISD::ATOMIC_FENCE);
+ unsigned int FenceOp =
+ getFenceOp(NVPTX::Ordering(N->getConstantOperandVal(1)),
+ Scopes[N->getConstantOperandVal(2)], Subtarget);
+ SDValue Chain = N->getOperand(0);
+ SDNode *FenceNode = CurDAG->getMachineNode(FenceOp, DL, MVT::Other, Chain);
+ ReplaceNode(N, FenceNode);
+ return true;
+}
+
+NVPTXScopes::NVPTXScopes(LLVMContext &C) : CTX(&C) {
+ Scopes[C.getOrInsertSyncScopeID("singlethread")] = NVPTX::Scope::Thread;
+ Scopes[C.getOrInsertSyncScopeID("")] = NVPTX::Scope::System;
+ Scopes[C.getOrInsertSyncScopeID("block")] = NVPTX::Scope::Block;
+ Scopes[C.getOrInsertSyncScopeID("cluster")] = NVPTX::Scope::Cluster;
+ Scopes[C.getOrInsertSyncScopeID("device")] = NVPTX::Scope::Device;
+}
+
+NVPTX::Scope NVPTXScopes::operator[](SyncScope::ID ID) const {
+ if (Scopes.empty())
+ report_fatal_error("NVPTX Scopes must be initialized before calling "
+ "NVPTXScopes::operator[]");
+
+ auto S = Scopes.find(ID);
+ if (S == Scopes.end()) {
+ SmallVector<StringRef, 8> ScopeNames;
+ assert(CTX != nullptr && "CTX is nullptr");
+ CTX->getSyncScopeNames(ScopeNames);
+ StringRef Unknown{"unknown"};
+ auto Name = ID < ScopeNames.size() ? ScopeNames[ID] : Unknown;
+ report_fatal_error(
+ formatv("Could not find scope ID={} with name \"{}\".", int(ID), Name));
+ }
+ return S->second;
+}
+
+bool NVPTXScopes::empty() const { return Scopes.size() == 0; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index eac4056599511c..7eccf9e45314b1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -18,13 +18,26 @@
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
+struct NVPTXScopes {
+ NVPTXScopes() = default;
+ NVPTXScopes(LLVMContext &C);
+ NVPTX::Scope operator[](SyncScope::ID ID) const;
+ bool empty() const;
+
+private:
+ SmallMapVector<SyncScope::ID, NVPTX::Scope, 8> Scopes{};
+ LLVMContext *CTX = nullptr;
+};
+
class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
const NVPTXTargetMachine &TM;
@@ -38,6 +51,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool allowUnsafeFPMath() const;
bool doRsqrtOpt() const;
+ NVPTXScopes Scopes{};
+
public:
NVPTXDAGToDAGISel() = delete;
@@ -66,6 +81,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool tryLoadParam(SDNode *N);
bool tryStoreRetval(SDNode *N);
bool tryStoreParam(SDNode *N);
+ bool tryFence(SDNode *N);
void SelectAddrSpaceCast(SDNode *N);
bool tryTextureIntrinsic(SDNode *N);
bool trySurfaceIntrinsic(SDNode *N);
@@ -100,8 +116,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, LoadSDNode *N);
- NVPTX::Ordering insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,
- MemSDNode *N);
+ std::pair<NVPTX::Ordering, NVPTX::Scope>
+ insertMemoryInstructionFence(SDLoc DL, SDValue &Chain, MemSDNode *N);
+ NVPTX::Scope getOperationScope(MemSDNode *N, NVPTX::Ordering O) const;
};
class NVPTXDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index b57c86fcf697cd..85876197331976 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2955,39 +2955,39 @@ foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
multiclass LD<NVPTXRegClass regclass> {
def _avar : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),
- "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${sco:sco}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
def _areg : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),
- "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${sco:sco}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
def _areg_64 : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),
- "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${sco:sco}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr];", []>;
def _ari : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
- "ld${sem:sem}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
+ "ld${sem:sem}${sco:sco}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "
"\t$dst, [$addr+$offset];", []>;
def _ari_64 : NVPTXInst<
(outs regclass:$dst),
- (ins LdStCode:$sem, LdStCode:$addsp, LdStCode:$Vec,
+ (ins LdStCode:$sem, LdStCode:$sco, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
- "ld${sem:sem}${a...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/106101
More information about the llvm-commits
mailing list