[llvm] [NVPTX] miscellaneous minor cleanup (NFC) (PR #152329)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 6 09:01:22 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
---
Patch is 82.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152329.diff
4 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+68-87)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (-2)
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+55-76)
- (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+253-487)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 6068035b2ee47..75e8635ec892f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1027,6 +1027,64 @@ pickOpcodeForVT(MVT::SimpleValueType VT, std::optional<unsigned> Opcode_i16,
}
}
+static inline bool isAddLike(const SDValue V) {
+ return V.getOpcode() == ISD::ADD ||
+ (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
+}
+
+// selectBaseADDR - Match a dag node which will serve as the base address for an
+// ADDR operand pair.
+static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
+ if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
+ return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
+ GA->getValueType(0), GA->getOffset(),
+ GA->getTargetFlags());
+ if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
+ return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
+ ES->getTargetFlags());
+ if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
+ return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
+
+ return N;
+}
+
+static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
+ APInt AccumulatedOffset(64u, 0);
+ while (isAddLike(Addr)) {
+ const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (!CN)
+ break;
+
+ const APInt CI = CN->getAPIntValue().sext(64);
+ if (!(CI + AccumulatedOffset).isSignedIntN(32))
+ break;
+
+ AccumulatedOffset += CI;
+ Addr = Addr->getOperand(0);
+ }
+ return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
+ MVT::i32);
+}
+
+static std::pair<SDValue, SDValue> selectADDR(SDValue Addr, SelectionDAG *DAG) {
+ SDValue Offset = accumulateOffset(Addr, SDLoc(Addr), DAG);
+ SDValue Base = selectBaseADDR(Addr, DAG);
+ return {Base, Offset};
+}
+
+// Select a pair of operands which represent a valid PTX address, this could be
+// one of the following things:
+// - [var] - Offset is simply set to 0
+// - [reg] - Offset is simply set to 0
+// - [reg+immOff]
+// - [var+immOff]
+// Note that immOff must fit into a 32-bit signed integer.
+bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ std::tie(Base, Offset) = selectADDR(Addr, CurDAG);
+ return true;
+}
+
bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
MemSDNode *LD = cast<MemSDNode>(N);
assert(LD->readMem() && "Expected load");
@@ -1062,8 +1120,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
FromTypeWidth <= 128 && "Invalid width for load");
// Create the machine instruction DAG
- SDValue Offset, Base;
- SelectADDR(N->getOperand(1), Base, Offset);
+ const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
SDValue Ops[] = {getI32Imm(Ordering, DL),
getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
@@ -1144,8 +1201,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load");
- SDValue Offset, Base;
- SelectADDR(N->getOperand(1), Base, Offset);
+ const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
SDValue Ops[] = {getI32Imm(Ordering, DL),
getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL),
@@ -1213,8 +1269,7 @@ bool NVPTXDAGToDAGISel::tryLDG(MemSDNode *LD) {
assert(isPowerOf2_32(FromTypeWidth) && FromTypeWidth >= 8 &&
FromTypeWidth <= 128 && TotalWidth <= 256 && "Invalid width for load");
- SDValue Base, Offset;
- SelectADDR(LD->getOperand(1), Base, Offset);
+ const auto [Base, Offset] = selectADDR(LD->getOperand(1), CurDAG);
SDValue Ops[] = {getI32Imm(FromType, DL), getI32Imm(FromTypeWidth, DL), Base,
Offset, LD->getChain()};
@@ -1278,8 +1333,7 @@ bool NVPTXDAGToDAGISel::tryLDU(SDNode *N) {
SDValue Addr =
LD->getOperand(LD->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 2 : 1);
- SDValue Base, Offset;
- SelectADDR(Addr, Base, Offset);
+ const auto [Base, Offset] = selectADDR(Addr, CurDAG);
SDValue Ops[] = {getI32Imm(FromTypeWidth, DL), Base, Offset, LD->getChain()};
std::optional<unsigned> Opcode;
@@ -1339,9 +1393,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
"Invalid width for store");
- SDValue Offset, Base;
- SelectADDR(ST->getBasePtr(), Base, Offset);
-
+ const auto [Base, Offset] = selectADDR(ST->getBasePtr(), CurDAG);
SDValue Ops[] = {selectPossiblyImm(Value),
getI32Imm(Ordering, DL),
getI32Imm(Scope, DL),
@@ -1399,9 +1451,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
assert(isPowerOf2_32(ToTypeWidth) && ToTypeWidth >= 8 && ToTypeWidth <= 128 &&
TotalWidth <= 256 && "Invalid width for store");
- SDValue Offset, Base;
- SelectADDR(Addr, Base, Offset);
-
+ const auto [Base, Offset] = selectADDR(Addr, CurDAG);
Ops.append({getI32Imm(Ordering, DL), getI32Imm(Scope, DL),
getI32Imm(CodeAddrSpace, DL), getI32Imm(ToTypeWidth, DL), Base,
Offset, Chain});
@@ -1708,58 +1758,6 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
return true;
}
-static inline bool isAddLike(const SDValue V) {
- return V.getOpcode() == ISD::ADD ||
- (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint());
-}
-
-// selectBaseADDR - Match a dag node which will serve as the base address for an
-// ADDR operand pair.
-static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
- if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
- return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
- GA->getValueType(0), GA->getOffset(),
- GA->getTargetFlags());
- if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
- return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
- ES->getTargetFlags());
- if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
- return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
-
- return N;
-}
-
-static SDValue accumulateOffset(SDValue &Addr, SDLoc DL, SelectionDAG *DAG) {
- APInt AccumulatedOffset(64u, 0);
- while (isAddLike(Addr)) {
- const auto *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (!CN)
- break;
-
- const APInt CI = CN->getAPIntValue().sext(64);
- if (!(CI + AccumulatedOffset).isSignedIntN(32))
- break;
-
- AccumulatedOffset += CI;
- Addr = Addr->getOperand(0);
- }
- return DAG->getSignedTargetConstant(AccumulatedOffset.getSExtValue(), DL,
- MVT::i32);
-}
-
-// Select a pair of operands which represent a valid PTX address, this could be
-// one of the following things:
-// - [var] - Offset is simply set to 0
-// - [reg] - Offset is simply set to 0
-// - [reg+immOff]
-// - [var+immOff]
-// Note that immOff must fit into a 32-bit signed integer.
-bool NVPTXDAGToDAGISel::SelectADDR(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- Offset = accumulateOffset(Addr, SDLoc(Addr), CurDAG);
- Base = selectBaseADDR(Addr, CurDAG);
- return true;
-}
SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
if (V.getOpcode() == ISD::BITCAST)
@@ -1774,37 +1772,20 @@ SDValue NVPTXDAGToDAGISel::selectPossiblyImm(SDValue V) {
return V;
}
-bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
- unsigned int spN) const {
- const Value *Src = nullptr;
- if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
- if (spN == 0 && mN->getMemOperand()->getPseudoValue())
- return true;
- Src = mN->getMemOperand()->getValue();
- }
- if (!Src)
- return false;
- if (auto *PT = dyn_cast<PointerType>(Src->getType()))
- return (PT->getAddressSpace() == spN);
- return false;
-}
-
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) {
- SDValue Op0, Op1;
switch (ConstraintID) {
default:
return true;
- case InlineAsm::ConstraintCode::m: // memory
- if (SelectADDR(Op, Op0, Op1)) {
- OutOps.push_back(Op0);
- OutOps.push_back(Op1);
+ case InlineAsm::ConstraintCode::m: { // memory
+ const auto [Base, Offset] = selectADDR(Op, CurDAG);
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
return false;
}
- break;
}
return true;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 9e0f88e544980..357e915fd077e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -102,8 +102,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
SDValue getPTXCmpMode(const CondCodeSDNode &CondCode);
SDValue selectPossiblyImm(SDValue V);
- bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
-
// Returns the Memory Order and Scope that the PTX memory instruction should
// use, and inserts appropriate fence instruction before the memory
// instruction, if needed to implement the instructions memory order. Required
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 6765ecb77da3a..71ae5118125b5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -148,13 +148,16 @@ class OneUse2<SDPatternOperator operator>
: PatFrag<(ops node:$A, node:$B), (operator node:$A, node:$B), [{ return N->hasOneUse(); }]>;
-class fpimm_pos_inf<ValueType vt>
- : FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;
-
class zeroinitializer<ValueType vt> :
PatLeaf<(vt (bitconvert (!cast<ValueType>("i" # vt.Size) 0)))>;
+def fpimm_pos_inf : FPImmLeaf<fAny, [{ return Imm.isPosInfinity(); }]>;
+def fpimm_0 : FPImmLeaf<fAny, [{ return Imm.isZero(); }]>;
+def fpimm_1 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(1.0); }]>;
+def fpimm_neg_1 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(-1.0); }]>;
+
+
// Operands which can hold a Register or an Immediate.
//
// Unfortunately, since most register classes can hold multiple types, we must
@@ -268,7 +271,7 @@ multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
foreach t = [I16RT, I32RT, I64RT] in
- defm t.Ty# : I3Inst<op_str # t.Size, op_node, t, commutative>;
+ defm t.Size# : I3Inst<op_str # t.Size, op_node, t, commutative>;
}
class I16x2<string OpcStr, SDNode OpNode> :
@@ -757,8 +760,8 @@ defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false>;
defm MULT : I3<"mul.lo.s", mul, commutative = true>;
-defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>;
-defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>;
+defm MUL_HI_S : I3<"mul.hi.s", mulhs, commutative = true>;
+defm MUL_HI_U : I3<"mul.hi.u", mulhu, commutative = true>;
defm SDIV : I3<"div.s", sdiv, commutative = false>;
defm UDIV : I3<"div.u", udiv, commutative = false>;
@@ -875,22 +878,6 @@ let Predicates = [hasOptEnabled] in {
// Floating Point Arithmetic
//-----------------------------------
-// Constant 1.0f
-def f32imm_1 : FPImmLeaf<f32, [{
- return &Imm.getSemantics() == &llvm::APFloat::IEEEsingle() &&
- Imm.convertToFloat() == 1.0f;
-}]>;
-// Constant 1.0 (double)
-def f64imm_1 : FPImmLeaf<f64, [{
- return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
- Imm.convertToDouble() == 1.0;
-}]>;
-// Constant -1.0 (double)
-def f64imm_neg1 : FPImmLeaf<f64, [{
- return &Imm.getSemantics() == &llvm::APFloat::IEEEdouble() &&
- Imm.convertToDouble() == -1.0;
-}]>;
-
defm FADD : F3_fma_component<"add", fadd>;
defm FSUB : F3_fma_component<"sub", fsub>;
defm FMUL : F3_fma_component<"mul", fmul>;
@@ -950,7 +937,7 @@ def FRCP64r :
BasicNVPTXInst<(outs B64:$dst),
(ins B64:$b),
"rcp.rn.f64",
- [(set f64:$dst, (fdiv f64imm_1, f64:$b))]>;
+ [(set f64:$dst, (fdiv fpimm_1, f64:$b))]>;
def FDIV64rr :
BasicNVPTXInst<(outs B64:$dst),
(ins B64:$a, B64:$b),
@@ -964,7 +951,7 @@ def FDIV64ri :
// fdiv will be converted to rcp
// fneg (fdiv 1.0, X) => fneg (rcp.rn X)
-def : Pat<(fdiv f64imm_neg1, f64:$b),
+def : Pat<(fdiv fpimm_neg_1, f64:$b),
(FNEGf64 (FRCP64r $b))>;
//
@@ -977,21 +964,21 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
}]>;
-def FRCP32_approx_r :
+def RCP_APPROX_F32_r :
BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$b), (ins FTZFlag:$ftz),
"rcp.approx$ftz.f32",
- [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
+ [(set f32:$dst, (fdiv_approx fpimm_1, f32:$b))]>;
//
// F32 Approximate division
//
-def FDIV32_approx_rr :
+def DIV_APPROX_F32_rr :
BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b), (ins FTZFlag:$ftz),
"div.approx$ftz.f32",
[(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
-def FDIV32_approx_ri :
+def DIV_APPROX_F32_ri :
BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, f32imm:$b), (ins FTZFlag:$ftz),
"div.approx$ftz.f32",
@@ -1008,8 +995,8 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b),
}]>;
-def : Pat<(fdiv_full f32imm_1, f32:$b),
- (FRCP32_approx_r $b)>;
+def : Pat<(fdiv_full fpimm_1, f32:$b),
+ (RCP_APPROX_F32_r $b)>;
//
// F32 Semi-accurate division
@@ -1037,7 +1024,7 @@ def FRCP32r_prec :
BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$b), (ins FTZFlag:$ftz),
"rcp.rn$ftz.f32",
- [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>;
+ [(set f32:$dst, (fdiv_ftz fpimm_1, f32:$b))]>;
//
// F32 Accurate division
//
@@ -1052,7 +1039,7 @@ def FDIV32ri_prec :
"div.rn$ftz.f32",
[(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>;
-def : Pat<(fdiv f32imm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
+def : Pat<(fdiv fpimm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
def : Pat<(fdiv f32:$a, f32:$b), (FDIV32rr_prec $a, $b, NoFTZ)>;
def : Pat<(fdiv f32:$a, fpimm:$b), (FDIV32ri_prec $a, fpimm:$b, NoFTZ)>;
@@ -1475,9 +1462,9 @@ def MmaCode : Operand<i32> {
// Get pointer to local stack.
let hasSideEffects = false in {
def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num),
- "mov.b32 \t$d, __local_depot$num;", []>;
+ "mov.b32 \t$d, __local_depot$num;">;
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs B64:$d), (ins i32imm:$num),
- "mov.b64 \t$d, __local_depot$num;", []>;
+ "mov.b64 \t$d, __local_depot$num;">;
}
@@ -1533,9 +1520,9 @@ def : Pat<(i64 externalsym:$dst), (MOV_B64_i (to_texternsym $dst))>;
//---- Copy Frame Index ----
def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
- "add.u32 \t$dst, ${addr:add};", []>;
+ "add.u32 \t$dst, ${addr:add};">;
def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
- "add.u64 \t$dst, ${addr:add};", []>;
+ "add.u64 \t$dst, ${addr:add};">;
def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
@@ -1612,12 +1599,12 @@ foreach is_convergent = [0, 1] in {
NVPTXInst<(outs),
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params,
i32imm:$proto),
- "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>;
+ "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;">;
def CALL_UNI # convergent_suffix :
NVPTXInst<(outs),
(ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
- "call.uni${rets:RetList} $addr, (${params:ParamList});", []>;
+ "call.uni${rets:RetList} $addr, (${params:ParamList});">;
}
defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
@@ -1633,10 +1620,10 @@ foreach is_convergent = [0, 1] in {
def DECLARE_PARAM_array :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$align, i32imm:$size),
- ".param .align $align .b8 \t$a[$size];", []>;
+ ".param .align $align .b8 \t$a[$size];">;
def DECLARE_PARAM_scalar :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
- ".param .b$size \t$a;", []>;
+ ".param .b$size \t$a;">;
def : Pat<(declare_array_param externalsym:$a, imm:$align, imm:$size),
(DECLARE_PARAM_array (to_texternsym $a), imm:$align, imm:$size)>;
@@ -1709,7 +1696,7 @@ class LD<NVPTXRegClass regclass>
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, AtomicCode:$Sign,
i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.${Sign:sign}$fromWidth "
- "\t$dst, [$addr];", []>;
+ "\t$dst, [$addr];">;
let mayLoad=1, hasSideEffects=0 in {
def LD_i16 : LD<B16>;
@@ -1724,7 +1711,7 @@ class ST<DAGOperand O>
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$toWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.b$toWidth"
- " \t[$addr], $src;", []>;
+ " \t[$addr], $src;">;
let mayStore=1, hasSideEffects=0 in {
def ST_i16 : ST<RI16>;
@@ -1741,13 +1728,13 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.v2.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2}}, [$addr];", []>;
+ "\t{{$dst1, $dst2}}, [$addr];">;
def _v4 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp,
AtomicCode:$Sign, i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.v4.${Sign:sign}$fromWidth "
- "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>;
+ "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];">;
if support_v8 then
def _v8 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
@@ -1756,7 +1743,7 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
i32imm:$fromWidth, ADDR:$addr),
"ld${sem:sem}${scope:scope}${addsp:addsp}.v8.${Sign:sign}$fromWidth "
"\t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, "
- "[$addr];", []>;
+ "[$addr];">;
}
let mayLoad=1, hasSideEffects=0 in {
defm LDV_i16 : LD_VEC<B16>;
@@ -1771,14 +1758,14 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v2.b$fromWidth "
- "\t[$addr], {{$src1, $src2}};", []>;
+ "\t[$addr], {{$src1, $src2}};">;
def _v4 : NVPTXInst<
(outs),
(ins O:$src1, O:$src2, O:$src3, O:$src4,
AtomicCode:$sem, AtomicCode:$scope, AtomicCode:$addsp, i32imm:$fromWidth,
ADDR:$addr),
"st${sem:sem}${scope:scope}${addsp:addsp}.v4.b$fromWidth "
- "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>;
+ "\t[$addr], {{$src1, $src2, $src3, $src4}};">;
if support_v8 then
def _v8 : NVPTXInst<
(outs),
@@ -1788,7 +1775,7 @@ multiclass ST_VEC<DAGOperand O, bit support_v8 = false> {
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/152329
More information about the llvm-commits
mailing list