[clang] [llvm] [NVPTX] Consolidate and cleanup various NVPTXISD nodes (NFC) (PR #145581)
Alex MacLean via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 24 19:45:54 PDT 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/145581
>From c8cc587ee44ba499cda0fc6712ca33ba22dc7c77 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Mon, 23 Jun 2025 18:23:24 +0000
Subject: [PATCH 1/3] [NVPTX] Remove various dead/extraneous NVPTXISD nodes
(NFC)
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 59 ++--------
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 32 +-----
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 13 ---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 113 ++++++--------------
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 2 +
5 files changed, 44 insertions(+), 175 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index ff10eea371049..af9050c55d33a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -160,8 +160,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::StoreParam:
case NVPTXISD::StoreParamV2:
case NVPTXISD::StoreParamV4:
- case NVPTXISD::StoreParamS32:
- case NVPTXISD::StoreParamU32:
if (tryStoreParam(N))
return;
break;
@@ -909,19 +907,9 @@ bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
switch (IID) {
default:
return false;
- case Intrinsic::nvvm_texsurf_handle_internal:
- SelectTexSurfHandle(N);
- return true;
}
}
-void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
- // Op 0 is the intrinsic ID
- SDValue Wrapper = N->getOperand(1);
- SDValue GlobalVal = Wrapper.getOperand(0);
- ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
- MVT::i64, GlobalVal));
-}
void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
SDValue Src = N->getOperand(0);
@@ -1717,8 +1705,6 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
switch (N->getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
- case NVPTXISD::StoreParamU32:
- case NVPTXISD::StoreParamS32:
case NVPTXISD::StoreParam:
NumElts = 1;
break;
@@ -1796,27 +1782,6 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
}
}
break;
- // Special case: if we have a sign-extend/zero-extend node, insert the
- // conversion instruction first, and use that as the value operand to
- // the selected StoreParam node.
- case NVPTXISD::StoreParamU32: {
- Opcode = NVPTX::StoreParamI32_r;
- SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
- MVT::i32);
- SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
- MVT::i32, Ops[0], CvtNone);
- Ops[0] = SDValue(Cvt, 0);
- break;
- }
- case NVPTXISD::StoreParamS32: {
- Opcode = NVPTX::StoreParamI32_r;
- SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
- MVT::i32);
- SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
- MVT::i32, Ops[0], CvtNone);
- Ops[0] = SDValue(Cvt, 0);
- break;
- }
}
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
@@ -2105,22 +2070,14 @@ static inline bool isAddLike(const SDValue V) {
// selectBaseADDR - Match a dag node which will serve as the base address for an
// ADDR operand pair.
static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
- // Return true if TGA or ES.
- if (N.getOpcode() == ISD::TargetGlobalAddress ||
- N.getOpcode() == ISD::TargetExternalSymbol)
- return N;
-
- if (N.getOpcode() == NVPTXISD::Wrapper)
- return N.getOperand(0);
-
- // addrspacecast(Wrapper(arg_symbol) to addrspace(PARAM)) -> arg_symbol
- if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N))
- if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
- CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
- CastN->getOperand(0).getOpcode() == NVPTXISD::Wrapper)
- return selectBaseADDR(CastN->getOperand(0).getOperand(0), DAG);
-
- if (auto *FIN = dyn_cast<FrameIndexSDNode>(N))
+ if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
+ return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
+ GA->getValueType(0), GA->getOffset(),
+ GA->getTargetFlags());
+ if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
+ return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
+ ES->getTargetFlags());
+ if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
return N;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d2fafe854e9e4..12d1cc17250ae 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -702,9 +702,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
-
// We want to legalize constant related memmove and memcopy
// intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -1055,14 +1052,10 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::FIRST_NUMBER:
break;
- MAKE_CASE(NVPTXISD::CALL)
MAKE_CASE(NVPTXISD::RET_GLUE)
- MAKE_CASE(NVPTXISD::LOAD_PARAM)
- MAKE_CASE(NVPTXISD::Wrapper)
MAKE_CASE(NVPTXISD::DeclareParam)
MAKE_CASE(NVPTXISD::DeclareScalarParam)
MAKE_CASE(NVPTXISD::DeclareRet)
- MAKE_CASE(NVPTXISD::DeclareScalarRet)
MAKE_CASE(NVPTXISD::DeclareRetParam)
MAKE_CASE(NVPTXISD::PrintCall)
MAKE_CASE(NVPTXISD::PrintConvergentCall)
@@ -1074,24 +1067,18 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(NVPTXISD::StoreParam)
MAKE_CASE(NVPTXISD::StoreParamV2)
MAKE_CASE(NVPTXISD::StoreParamV4)
- MAKE_CASE(NVPTXISD::StoreParamS32)
- MAKE_CASE(NVPTXISD::StoreParamU32)
MAKE_CASE(NVPTXISD::CallArgBegin)
MAKE_CASE(NVPTXISD::CallArg)
MAKE_CASE(NVPTXISD::LastCallArg)
MAKE_CASE(NVPTXISD::CallArgEnd)
MAKE_CASE(NVPTXISD::CallVoid)
- MAKE_CASE(NVPTXISD::CallVal)
- MAKE_CASE(NVPTXISD::CallSymbol)
MAKE_CASE(NVPTXISD::Prototype)
MAKE_CASE(NVPTXISD::MoveParam)
MAKE_CASE(NVPTXISD::StoreRetval)
MAKE_CASE(NVPTXISD::StoreRetvalV2)
MAKE_CASE(NVPTXISD::StoreRetvalV4)
- MAKE_CASE(NVPTXISD::PseudoUseParam)
MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
MAKE_CASE(NVPTXISD::BUILD_VECTOR)
- MAKE_CASE(NVPTXISD::RETURN)
MAKE_CASE(NVPTXISD::CallSeqBegin)
MAKE_CASE(NVPTXISD::CallSeqEnd)
MAKE_CASE(NVPTXISD::CallPrototype)
@@ -1115,7 +1102,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(NVPTXISD::STACKSAVE)
MAKE_CASE(NVPTXISD::SETP_F16X2)
MAKE_CASE(NVPTXISD::SETP_BF16X2)
- MAKE_CASE(NVPTXISD::Dummy)
MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
MAKE_CASE(NVPTXISD::BrxEnd)
@@ -1189,15 +1175,6 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
}
}
-SDValue
-NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
- const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
- auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
- Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
- return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
-}
-
std::string NVPTXTargetLowering::getPrototype(
const DataLayout &DL, Type *retTy, const ArgListTy &Args,
const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign RetAlign,
@@ -2919,8 +2896,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
case ISD::ADDRSPACECAST:
return LowerADDRSPACECAST(Op, DAG);
- case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
return Op;
case ISD::INTRINSIC_WO_CHAIN:
@@ -3129,8 +3104,7 @@ SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());
// Store the address of unsized array <function>_vararg[] in the ap object.
- SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
- SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg);
+ SDValue VAReg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1),
@@ -3370,7 +3344,7 @@ SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
EVT v) const {
StringRef SavedStr = nvTM->getStrPool().save(
getParamName(&DAG.getMachineFunction().getFunction(), idx));
- return DAG.getTargetExternalSymbol(SavedStr.data(), v);
+ return DAG.getExternalSymbol(SavedStr.data(), v);
}
SDValue NVPTXTargetLowering::LowerFormalArguments(
@@ -3438,7 +3412,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue P;
if (isKernelFunction(*F)) {
- P = DAG.getNode(NVPTXISD::Wrapper, dl, ByvalIn.VT, ArgSymbol);
+ P = ArgSymbol;
P.getNode()->setIROrder(Arg.getArgNo() + 1);
} else {
P = DAG.getNode(NVPTXISD::MoveParam, dl, ByvalIn.VT, ArgSymbol);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 0a54a8fd71f32..984e95c1b2e79 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -24,15 +24,11 @@ namespace NVPTXISD {
enum NodeType : unsigned {
// Start the numbering from where ISD NodeType finishes.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- Wrapper,
- CALL,
RET_GLUE,
- LOAD_PARAM,
DeclareParam,
DeclareScalarParam,
DeclareRetParam,
DeclareRet,
- DeclareScalarRet,
PrintCall,
PrintConvergentCall,
PrintCallUni,
@@ -42,12 +38,8 @@ enum NodeType : unsigned {
LastCallArg,
CallArgEnd,
CallVoid,
- CallVal,
- CallSymbol,
Prototype,
MoveParam,
- PseudoUseParam,
- RETURN,
CallSeqBegin,
CallSeqEnd,
CallPrototype,
@@ -83,7 +75,6 @@ enum NodeType : unsigned {
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z,
- Dummy,
FIRST_MEMORY_OPCODE,
LoadV2 = FIRST_MEMORY_OPCODE,
@@ -100,8 +91,6 @@ enum NodeType : unsigned {
StoreParam,
StoreParamV2,
StoreParamV4,
- StoreParamS32, // to sext and store a <32bit value, not used currently
- StoreParamU32, // to zext and store a <32bit value, not used currently
StoreRetval,
StoreRetvalV2,
StoreRetvalV4,
@@ -120,8 +109,6 @@ class NVPTXTargetLowering : public TargetLowering {
const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-
const char *getTargetNodeName(unsigned Opcode) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 5979054764647..ea8a3f955645f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1700,17 +1700,6 @@ def Offseti32imm : Operand<i32> {
let PrintMethod = "printOffseti32imm";
}
-def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
-def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
-
-// Load a memory address into a u32 or u64 register.
-def MOV_ADDR : BasicNVPTXInst<(outs B32:$dst), (ins ADDR_base:$a),
- "mov.b32",
- [(set i32:$dst, (Wrapper tglobaladdr:$a))]>;
-def MOV_ADDR64 : BasicNVPTXInst<(outs B64:$dst), (ins ADDR_base:$a),
- "mov.b64",
- [(set i64:$dst, (Wrapper tglobaladdr:$a))]>;
-
// Get pointer to local stack.
let hasSideEffects = false in {
def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num),
@@ -1750,8 +1739,24 @@ def BFMOV16i : MOVi<B16, "b16", bf16, bf16imm, fpimm>;
def FMOV32i : MOVi<B32, "b32", f32, f32imm, fpimm>;
def FMOV64i : MOVi<B64, "b64", f64, f64imm, fpimm>;
-def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>;
-def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>;
+
+def to_tglobaladdr : SDNodeXForm<globaladdr, [{
+ return CurDAG->getTargetGlobalAddress(N->getGlobal(), SDLoc(N), N->getValueType(0), N->getOffset(), N->getTargetFlags());
+}]>;
+
+def to_texternsym : SDNodeXForm<externalsym, [{
+ return CurDAG->getTargetExternalSymbol(N->getSymbol(), N->getValueType(0), N->getTargetFlags());
+}]>;
+
+def to_tframeindex : SDNodeXForm<frameindex, [{
+ return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
+}]>;
+
+def : Pat<(i32 globaladdr:$dst), (IMOV32i (to_tglobaladdr $dst))>;
+def : Pat<(i64 globaladdr:$dst), (IMOV64i (to_tglobaladdr $dst))>;
+
+def : Pat<(i32 externalsym:$dst), (IMOV32i (to_texternsym $dst))>;
+def : Pat<(i64 externalsym:$dst), (IMOV64i (to_texternsym $dst))>;
//---- Copy Frame Index ----
def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
@@ -1759,10 +1764,6 @@ def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
"add.u64 \t$dst, ${addr:add};", []>;
-def to_tframeindex : SDNodeXForm<frameindex, [{
- return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
-}]>;
-
def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
@@ -1990,11 +1991,10 @@ def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]
def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
def SDTCallValProfile : SDTypeProfile<1, 0, []>;
-def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
+def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
-def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
def DeclareParam :
@@ -2039,12 +2039,6 @@ def StoreParamV2 :
def StoreParamV4 :
SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamU32 :
- SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamS32 :
- SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallArgBegin :
SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
@@ -2063,9 +2057,6 @@ def CallVoid :
def Prototype :
SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallVal :
- SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def MoveParam :
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
def StoreRetval :
@@ -2077,12 +2068,6 @@ def StoreRetvalV2 :
def StoreRetvalV4 :
SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
[SDNPHasChain, SDNPSideEffect]>;
-def PseudoUseParam :
- SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def RETURNNode :
- SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
- [SDNPHasChain, SDNPSideEffect]>;
def ProxyReg :
SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
@@ -2247,19 +2232,18 @@ def StoreRetvalV4I8 : StoreRetvalV4Inst<B16, ".b8">;
def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
-def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
[(CallArg 1, imm:$a)]>;
def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
[(LastCallArg 1, imm:$a)]>;
-def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ",
- [(CallVoid (Wrapper tglobaladdr:$addr))]>;
-def CallVoidInstReg : NVPTXInst<(outs), (ins B32:$addr), "$addr, ",
- [(CallVoid i32:$addr)]>;
-def CallVoidInstReg64 : NVPTXInst<(outs), (ins B64:$addr), "$addr, ",
- [(CallVoid i64:$addr)]>;
+def CALL_ADDR : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ", []>;
+
+def : Pat<(CallVoid globaladdr:$addr), (CALL_ADDR (to_tglobaladdr $addr))>;
+def : Pat<(CallVoid i32:$addr), (CALL_ADDR $addr)>;
+def : Pat<(CallVoid i64:$addr), (CALL_ADDR $addr)>;
+
def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
[(Prototype (i32 imm:$val))]>;
@@ -2271,10 +2255,6 @@ def DeclareRetScalarInst :
NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
".param .b$size retval$num;",
[(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
-def DeclareRetRegInst :
- NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
- ".reg .b$size retval$num;",
- [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
def DeclareParamInst :
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size),
@@ -2284,29 +2264,13 @@ def DeclareScalarParamInst :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
".param .b$size param$a;",
[(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
-def DeclareScalarRegInst :
- NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
- ".reg .b$size param$a;",
- [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
-
-class MoveParamSymbolInst<RegTyInfo t> :
- BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src),
- "mov.b" # t.Size,
- [(set t.Ty:$dst, (MoveParam texternalsym:$src))]>;
-
-def MOV64_PARAM : MoveParamSymbolInst<I64RT>;
-def MOV32_PARAM : MoveParamSymbolInst<I32RT>;
-class PseudoUseParamInst<NVPTXRegClass regclass, ValueType vt> :
- NVPTXInst<(outs), (ins regclass:$src),
- "// Pseudo use of $src",
- [(PseudoUseParam vt:$src)]>;
-
-def PseudoUseParamI64 : PseudoUseParamInst<B64, i64>;
-def PseudoUseParamI32 : PseudoUseParamInst<B32, i32>;
-def PseudoUseParamI16 : PseudoUseParamInst<B16, i16>;
-def PseudoUseParamF64 : PseudoUseParamInst<B64, f64>;
-def PseudoUseParamF32 : PseudoUseParamInst<B32, f32>;
+foreach t = [I32RT, I64RT] in {
+ defvar inst_name = "MOV" # t.Size # "_PARAM";
+ def inst_name : BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), "mov.b" # t.Size>;
+ def : Pat<(MoveParam (t.Ty externalsym:$src)),
+ (!cast<NVPTXInst>(inst_name) (t.Ty (to_texternsym $src)))>;
+}
multiclass ProxyRegInst<string SzStr, NVPTXRegClass rc> {
def NAME : BasicNVPTXInst<(outs rc:$dst), (ins rc:$src),
@@ -2861,21 +2825,6 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPSideEffect]>;
-def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def calltarget : Operand<i32>;
-let isCall=1 in {
- def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>;
-}
-
-def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>;
-def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>;
-
-// Pseudo instructions.
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : NVPTXInst<outs, ins, asmstr, pattern>;
-
def Callseq_Start :
NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
"\\{ // callseq $amt1, $amt2",
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 10d7f04d8d937..cc1fd027d8515 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2224,6 +2224,8 @@ def nvvm_move_sym64 : NVPTXInst<(outs B64:$r), (ins ADDR_base:$s),
def texsurf_handles
: BasicNVPTXInst<(outs B64:$result), (ins ADDR_base:$src), "mov.u64">;
+def : Pat<(int_nvvm_texsurf_handle_internal globaladdr:$src),
+ (texsurf_handles (to_tglobaladdr $src))>;
//-----------------------------------
// Compiler Error Warn
>From aad4cadf4610d9d2fdc9b341e65a54c0e5677415 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 24 Jun 2025 16:59:08 +0000
Subject: [PATCH 2/3] more cleanup
---
clang/test/CodeGenCUDA/bf16.cu | 6 +-
.../NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp | 22 +
.../NVPTX/MCTargetDesc/NVPTXInstPrinter.h | 2 +
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 1 -
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 89 +---
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 19 +-
llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp | 20 +-
llvm/lib/Target/NVPTX/NVPTXInstrInfo.h | 3 -
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 139 ++---
llvm/test/CodeGen/NVPTX/alias.ll | 3 +-
.../test/CodeGen/NVPTX/bf16x2-instructions.ll | 7 +-
llvm/test/CodeGen/NVPTX/byval-const-global.ll | 6 +-
.../CodeGen/NVPTX/call-with-alloca-buffer.ll | 3 +-
llvm/test/CodeGen/NVPTX/combine-mad.ll | 7 +-
.../test/CodeGen/NVPTX/convergent-mir-call.ll | 10 +-
.../CodeGen/NVPTX/convert-call-to-indirect.ll | 43 +-
llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll | 12 +-
llvm/test/CodeGen/NVPTX/f16-instructions.ll | 24 +-
llvm/test/CodeGen/NVPTX/f16x2-instructions.ll | 21 +-
llvm/test/CodeGen/NVPTX/fma.ll | 14 +-
llvm/test/CodeGen/NVPTX/forward-ld-param.ll | 12 +-
llvm/test/CodeGen/NVPTX/fp128-storage-type.ll | 6 +-
llvm/test/CodeGen/NVPTX/i16x2-instructions.ll | 21 +-
llvm/test/CodeGen/NVPTX/i8x4-instructions.ll | 21 +-
llvm/test/CodeGen/NVPTX/indirect_byval.ll | 16 +-
llvm/test/CodeGen/NVPTX/ldparam-v4.ll | 5 +-
llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 36 +-
.../CodeGen/NVPTX/lower-args-gridconstant.ll | 36 +-
llvm/test/CodeGen/NVPTX/lower-args.ll | 14 +-
llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 24 +-
llvm/test/CodeGen/NVPTX/misched_func_call.ll | 6 +-
.../NVPTX/naked-fn-with-frame-pointer.ll | 20 +-
llvm/test/CodeGen/NVPTX/param-add.ll | 6 +-
llvm/test/CodeGen/NVPTX/param-load-store.ll | 168 ++----
llvm/test/CodeGen/NVPTX/param-overalign.ll | 89 ++--
.../CodeGen/NVPTX/param-vectorize-device.ll | 72 +--
llvm/test/CodeGen/NVPTX/shift-opt.ll | 12 +-
llvm/test/CodeGen/NVPTX/st-param-imm.ll | 504 +++---------------
llvm/test/CodeGen/NVPTX/store-undef.ll | 12 +-
llvm/test/CodeGen/NVPTX/tex-read-cuda.ll | 6 +-
.../NVPTX/unaligned-param-load-store.ll | 42 +-
llvm/test/CodeGen/NVPTX/unreachable.ll | 20 +-
llvm/test/CodeGen/NVPTX/variadics-backend.ll | 28 +-
.../Inputs/nvptx-basic.ll.expected | 6 +-
44 files changed, 387 insertions(+), 1246 deletions(-)
diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu
index df56ec60c63ae..12474381e718b 100644
--- a/clang/test/CodeGenCUDA/bf16.cu
+++ b/clang/test/CodeGenCUDA/bf16.cu
@@ -37,11 +37,7 @@ __device__ __bf16 test_call( __bf16 in) {
// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
// CHECK: st.param.b16 [param0], %[[R]];
// CHECK: .param .align 2 .b8 retval0[2];
-// CHECK: call.uni (retval0),
-// CHECK-NEXT: _Z13external_funcDF16b,
-// CHECK-NEXT: (
-// CHECK-NEXT: param0
-// CHECK-NEXT );
+// CHECK: call.uni (retval0), _Z13external_funcDF16b, (param0);
// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0];
return external_func(in);
// CHECK: st.param.b16 [func_retval0], %[[RET]]
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index cc79257fb9c86..28f6968ee6caf 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -457,3 +457,25 @@ void NVPTXInstPrinter::printCTAGroup(const MCInst *MI, int OpNum,
}
llvm_unreachable("Invalid cta_group in printCTAGroup");
}
+
+void NVPTXInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O, StringRef Modifier) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isImm() && "Invalid operand");
+ const auto Imm = MO.getImm();
+
+ if (Modifier == "RetList") {
+ assert((Imm == 1 || Imm == 0) && "Invalid return list");
+ if (Imm)
+ O << " (retval0),";
+ return;
+ }
+
+ if (Modifier == "ParamList") {
+ assert(Imm >= 0 && "Invalid parameter list");
+ interleaveComma(llvm::seq(Imm), O,
+ [&](const auto &I) { O << "param" << I; });
+ return;
+ }
+ llvm_unreachable("Invalid modifier");
+}
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index f73af7a3f2c6e..6189284e8a58c 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -52,6 +52,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O);
void printTmaReductionMode(const MCInst *MI, int OpNum, raw_ostream &O);
void printCTAGroup(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O,
+ StringRef Modifier = {});
};
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index af9050c55d33a..849274f3678ca 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -910,7 +910,6 @@ bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
}
}
-
void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
SDValue Src = N->getOperand(0);
AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 12d1cc17250ae..b924a1f5ac93c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1057,30 +1057,19 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(NVPTXISD::DeclareScalarParam)
MAKE_CASE(NVPTXISD::DeclareRet)
MAKE_CASE(NVPTXISD::DeclareRetParam)
- MAKE_CASE(NVPTXISD::PrintCall)
- MAKE_CASE(NVPTXISD::PrintConvergentCall)
- MAKE_CASE(NVPTXISD::PrintCallUni)
- MAKE_CASE(NVPTXISD::PrintConvergentCallUni)
+ MAKE_CASE(NVPTXISD::CALL)
MAKE_CASE(NVPTXISD::LoadParam)
MAKE_CASE(NVPTXISD::LoadParamV2)
MAKE_CASE(NVPTXISD::LoadParamV4)
MAKE_CASE(NVPTXISD::StoreParam)
MAKE_CASE(NVPTXISD::StoreParamV2)
MAKE_CASE(NVPTXISD::StoreParamV4)
- MAKE_CASE(NVPTXISD::CallArgBegin)
- MAKE_CASE(NVPTXISD::CallArg)
- MAKE_CASE(NVPTXISD::LastCallArg)
- MAKE_CASE(NVPTXISD::CallArgEnd)
- MAKE_CASE(NVPTXISD::CallVoid)
- MAKE_CASE(NVPTXISD::Prototype)
MAKE_CASE(NVPTXISD::MoveParam)
MAKE_CASE(NVPTXISD::StoreRetval)
MAKE_CASE(NVPTXISD::StoreRetvalV2)
MAKE_CASE(NVPTXISD::StoreRetvalV4)
MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
MAKE_CASE(NVPTXISD::BUILD_VECTOR)
- MAKE_CASE(NVPTXISD::CallSeqBegin)
- MAKE_CASE(NVPTXISD::CallSeqEnd)
MAKE_CASE(NVPTXISD::CallPrototype)
MAKE_CASE(NVPTXISD::ProxyReg)
MAKE_CASE(NVPTXISD::LoadV2)
@@ -1578,9 +1567,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
? promoteScalarArgumentSize(TypeSize * 8)
: TypeSize * 8;
- Chain = DAG.getNode(
- NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
- {Chain, GetI32(ArgI), GetI32(PromotedSize), GetI32(0), InGlue});
+ Chain =
+ DAG.getNode(NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
+ {Chain, GetI32(ArgI), GetI32(PromotedSize), InGlue});
}
InGlue = Chain.getValue(1);
@@ -1717,16 +1706,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const unsigned ResultSize = DL.getTypeAllocSizeInBits(RetTy);
if (!shouldPassAsArray(RetTy)) {
const unsigned PromotedResultSize = promoteScalarArgumentSize(ResultSize);
- SDValue DeclareRetOps[] = {Chain, GetI32(1), GetI32(PromotedResultSize),
- GetI32(0), InGlue};
Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, {MVT::Other, MVT::Glue},
- DeclareRetOps);
+ {Chain, GetI32(PromotedResultSize), InGlue});
InGlue = Chain.getValue(1);
} else {
- SDValue DeclareRetOps[] = {Chain, GetI32(RetAlign->value()),
- GetI32(ResultSize / 8), GetI32(0), InGlue};
- Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl,
- {MVT::Other, MVT::Glue}, DeclareRetOps);
+ Chain = DAG.getNode(
+ NVPTXISD::DeclareRetParam, dl, {MVT::Other, MVT::Glue},
+ {Chain, GetI32(RetAlign->value()), GetI32(ResultSize / 8), InGlue});
InGlue = Chain.getValue(1);
}
}
@@ -1777,25 +1763,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
HasVAArgs ? std::optional(FirstVAArg) : std::nullopt, *CB,
UniqueCallSite);
const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
- SDValue ProtoOps[] = {
- Chain,
- DAG.getTargetExternalSymbol(ProtoStr, MVT::i32),
- InGlue,
- };
- Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue},
- ProtoOps);
+ Chain = DAG.getNode(
+ NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue},
+ {Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InGlue});
InGlue = Chain.getValue(1);
}
- // Op to just print "call"
- SDValue PrintCallOps[] = {Chain, GetI32(Ins.empty() ? 0 : 1), InGlue};
- // We model convergent calls as separate opcodes.
- unsigned Opcode =
- IsIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
- if (CLI.IsConvergent)
- Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
- : NVPTXISD::PrintConvergentCall;
- Chain = DAG.getNode(Opcode, dl, {MVT::Other, MVT::Glue}, PrintCallOps);
- InGlue = Chain.getValue(1);
if (ConvertToIndirectCall) {
// Copy the function ptr to a ptx register and use the register to call the
@@ -1809,38 +1781,17 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee = DAG.getCopyFromReg(RegCopy, dl, DestReg, DestVT);
}
- // Ops to print out the function name
- SDValue CallVoidOps[] = { Chain, Callee, InGlue };
- Chain =
- DAG.getNode(NVPTXISD::CallVoid, dl, {MVT::Other, MVT::Glue}, CallVoidOps);
- InGlue = Chain.getValue(1);
-
- // Ops to print out the param list
- SDValue CallArgBeginOps[] = { Chain, InGlue };
- Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, {MVT::Other, MVT::Glue},
- CallArgBeginOps);
+ const unsigned Proto = IsIndirectCall ? UniqueCallSite : 0;
+ const unsigned NumArgs =
+ std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
+ /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
+ /// NumParams, Callee, Proto, InGlue)
+ Chain = DAG.getNode(NVPTXISD::CALL, dl, {MVT::Other, MVT::Glue},
+ {Chain, GetI32(CLI.IsConvergent), GetI32(IsIndirectCall),
+ GetI32(Ins.empty() ? 0 : 1), GetI32(NumArgs), Callee,
+ GetI32(Proto), InGlue});
InGlue = Chain.getValue(1);
- const unsigned E = std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
- for (const unsigned I : llvm::seq(E)) {
- const unsigned Opcode =
- I == (E - 1) ? NVPTXISD::LastCallArg : NVPTXISD::CallArg;
- SDValue CallArgOps[] = {Chain, GetI32(1), GetI32(I), InGlue};
- Chain = DAG.getNode(Opcode, dl, {MVT::Other, MVT::Glue}, CallArgOps);
- InGlue = Chain.getValue(1);
- }
- SDValue CallArgEndOps[] = {Chain, GetI32(IsIndirectCall ? 0 : 1), InGlue};
- Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, {MVT::Other, MVT::Glue},
- CallArgEndOps);
- InGlue = Chain.getValue(1);
-
- if (IsIndirectCall) {
- SDValue PrototypeOps[] = {Chain, GetI32(UniqueCallSite), InGlue};
- Chain = DAG.getNode(NVPTXISD::Prototype, dl, {MVT::Other, MVT::Glue},
- PrototypeOps);
- InGlue = Chain.getValue(1);
- }
-
SmallVector<SDValue, 16> ProxyRegOps;
// An item of the vector is filled if the element does not need a ProxyReg
// operation on it and should be added to InVals as is. ProxyRegOps and
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 984e95c1b2e79..5efdd1582214a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -29,19 +29,14 @@ enum NodeType : unsigned {
DeclareScalarParam,
DeclareRetParam,
DeclareRet,
- PrintCall,
- PrintConvergentCall,
- PrintCallUni,
- PrintConvergentCallUni,
- CallArgBegin,
- CallArg,
- LastCallArg,
- CallArgEnd,
- CallVoid,
- Prototype,
+
+ /// This node represents a PTX call instruction. It's operands are as follows:
+ ///
+ /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
+ /// NumParams, Callee, Proto, InGlue)
+ CALL,
+
MoveParam,
- CallSeqBegin,
- CallSeqEnd,
CallPrototype,
ProxyReg,
FSHL_CLAMP,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index bf84d1dca4ed5..e218ef17bb09b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -190,22 +190,4 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
return 2;
-}
-
-bool NVPTXInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
- const MachineBasicBlock *MBB,
- const MachineFunction &MF) const {
- // Prevent the scheduler from reordering & splitting up MachineInstrs
- // which must stick together (in initially set order) to
- // comprise a valid PTX function call sequence.
- switch (MI.getOpcode()) {
- case NVPTX::CallUniPrintCallRetInst1:
- case NVPTX::CallArgBeginInst:
- case NVPTX::CallArgParam:
- case NVPTX::LastCallArgParam:
- case NVPTX::CallArgEndInst1:
- return true;
- }
-
- return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
-}
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 95464dbbd176d..4e9dc9d3b4686 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -66,9 +66,6 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
- bool isSchedulingBoundary(const MachineInstr &MI,
- const MachineBasicBlock *MBB,
- const MachineFunction &MF) const override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index ea8a3f955645f..bc506dd8a7114 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1976,21 +1976,15 @@ defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
def SDTDeclareParamProfile :
- SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
+ SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
def SDTDeclareScalarParamProfile :
- SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
+ SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
-def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
-def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
-def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
-def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
-def SDTCallValProfile : SDTypeProfile<1, 0, []>;
def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
@@ -2004,10 +1998,12 @@ def DeclareScalarParam :
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def DeclareRetParam :
- SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile,
+ SDNode<"NVPTXISD::DeclareRetParam",
+ SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def DeclareRet :
- SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
+ SDNode<"NVPTXISD::DeclareRet",
+ SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def LoadParam :
SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
@@ -2018,18 +2014,6 @@ def LoadParamV2 :
def LoadParamV4 :
SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
[SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
-def PrintCall :
- SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def PrintConvergentCall :
- SDNode<"NVPTXISD::PrintConvergentCall", SDTPrintCallProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def PrintCallUni :
- SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def PrintConvergentCallUni :
- SDNode<"NVPTXISD::PrintConvergentCallUni", SDTPrintCallProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def StoreParam :
SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
@@ -2039,24 +2023,6 @@ def StoreParamV2 :
def StoreParamV4 :
SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallArgBegin :
- SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallArg :
- SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def LastCallArg :
- SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallArgEnd :
- SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallVoid :
- SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def Prototype :
- SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def MoveParam :
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
def StoreRetval :
@@ -2072,6 +2038,15 @@ def ProxyReg :
SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+ /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
+ /// NumParams, Callee, Proto, InGlue)
+def SDTCallProfile : SDTypeProfile<0, 6,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<5, i32>]>;
+def call :
+ SDNode<"NVPTXISD::CALL", SDTCallProfile,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+
let mayLoad = true in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
@@ -2092,11 +2067,6 @@ let mayLoad = true in {
[]>;
}
-class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
- NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
- !strconcat("mov", opstr, " \t$dst, retval$b;"),
- [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
-
let mayStore = true in {
multiclass StoreParamInst<NVPTXRegClass regclass, Operand IMMType, string opstr, bit support_imm = true> {
@@ -2159,23 +2129,40 @@ let mayStore = true in {
[]>;
}
-let isCall=1 in {
- multiclass CALL<string OpcStr, SDNode OpNode> {
- def PrintCallNoRetInst : NVPTXInst<(outs), (ins),
- OpcStr # " ", [(OpNode 0)]>;
- def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
- OpcStr # " (retval0), ", [(OpNode 1)]>;
+/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
+/// NumParams, Callee, Proto, InGlue)
+
+def CallOperand : Operand<i32> { let PrintMethod = "printCallOperand"; }
+
+foreach is_convergent = [0, 1] in {
+ defvar convergent_suffix = !if(is_convergent, "_conv", "");
+
+ let isCall = 1, isConvergent = is_convergent in {
+ def CALL # convergent_suffix :
+ NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params, i32imm:$proto),
+ "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>;
+
+ def CALL_UNI # convergent_suffix :
+ NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
+ "call.uni${rets:RetList} $addr, (${params:ParamList});", []>;
}
-}
-defm Call : CALL<"call", PrintCall>;
-defm CallUni : CALL<"call.uni", PrintCallUni>;
+ defvar inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
+ defvar inst_uni = !cast<NVPTXInst>("CALL_UNI" # convergent_suffix);
-// Convergent call instructions. These are identical to regular calls, except
-// they have the isConvergent bit set.
-let isConvergent=1 in {
- defm ConvergentCall : CALL<"call", PrintConvergentCall>;
- defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>;
+ def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, globaladdr:$addr, imm:$proto),
+ (inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>;
+ def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i32:$addr, imm:$proto),
+ (inst $addr, imm:$rets, imm:$params, imm:$proto)>;
+ def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i64:$addr, imm:$proto),
+ (inst $addr, imm:$rets, imm:$params, imm:$proto)>;
+
+ def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, globaladdr:$addr, 0),
+ (inst_uni (to_tglobaladdr $addr), imm:$rets, imm:$params)>;
+ def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i32:$addr, 0),
+ (inst_uni $addr, imm:$rets, imm:$params)>;
+ def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i64:$addr, 0),
+ (inst_uni $addr, imm:$rets, imm:$params)>;
}
def LoadParamMemI64 : LoadParamMemInst<B64, ".b64">;
@@ -2229,41 +2216,23 @@ def StoreRetvalV4I32 : StoreRetvalV4Inst<B32, ".b32">;
def StoreRetvalV4I16 : StoreRetvalV4Inst<B16, ".b16">;
def StoreRetvalV4I8 : StoreRetvalV4Inst<B16, ".b8">;
-def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
-def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
-def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
-
-def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
- [(CallArg 1, imm:$a)]>;
-def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
- [(LastCallArg 1, imm:$a)]>;
-
-def CALL_ADDR : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ", []>;
-
-def : Pat<(CallVoid globaladdr:$addr), (CALL_ADDR (to_tglobaladdr $addr))>;
-def : Pat<(CallVoid i32:$addr), (CALL_ADDR $addr)>;
-def : Pat<(CallVoid i64:$addr), (CALL_ADDR $addr)>;
-
-def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
- [(Prototype (i32 imm:$val))]>;
-
def DeclareRetMemInst :
- NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num),
- ".param .align $align .b8 retval$num[$size];",
- [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
+ NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size),
+ ".param .align $align .b8 retval0[$size];",
+ [(DeclareRetParam imm:$align, imm:$size)]>;
def DeclareRetScalarInst :
- NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
- ".param .b$size retval$num;",
- [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
+ NVPTXInst<(outs), (ins i32imm:$size),
+ ".param .b$size retval0;",
+ [(DeclareRet imm:$size)]>;
def DeclareParamInst :
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size),
".param .align $align .b8 param$a[$size];",
- [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
+ [(DeclareParam imm:$align, imm:$a, imm:$size)]>;
def DeclareScalarParamInst :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
".param .b$size param$a;",
- [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
+ [(DeclareScalarParam imm:$a, imm:$size)]>;
foreach t = [I32RT, I64RT] in {
defvar inst_name = "MOV" # t.Size # "_PARAM";
diff --git a/llvm/test/CodeGen/NVPTX/alias.ll b/llvm/test/CodeGen/NVPTX/alias.ll
index 8ae29b51290ef..01761c21ab103 100644
--- a/llvm/test/CodeGen/NVPTX/alias.ll
+++ b/llvm/test/CodeGen/NVPTX/alias.ll
@@ -56,8 +56,7 @@ attributes #0 = { noreturn }
; CHECK-NEXT: .noreturn
; CHECK: .visible .func (.param .b32 func_retval0) z()
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: b,
+; CHECK: call.uni (retval0), b,
; CHECK: .alias b, a;
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
index 6f115756a8ae7..01e4065a7baa7 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -216,12 +216,7 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
diff --git a/llvm/test/CodeGen/NVPTX/byval-const-global.ll b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
index 2af1e6d7e185b..ad9e4b089e8d8 100644
--- a/llvm/test/CodeGen/NVPTX/byval-const-global.ll
+++ b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
@@ -19,11 +19,7 @@ define void @foo() {
; CHECK-NEXT: .param .align 8 .b8 param0[16];
; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: bar,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni bar, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @bar(ptr byval(%struct) @G)
diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index a2175dd009f5f..0cd7058174d67 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -48,8 +48,7 @@ entry:
; CHECK-NEXT: st.param.b64 [param0], %rd[[A_REG]]
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: st.param.b64 [param1], %rd[[SP_REG]]
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: callee,
+; CHECK-NEXT: call.uni callee,
call void @callee(ptr %a, ptr %buf) #2
ret void
diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll
index dc6d504c2c66c..2232810d02128 100644
--- a/llvm/test/CodeGen/NVPTX/combine-mad.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll
@@ -203,12 +203,7 @@ define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: .param .b32 param1;
; CHECK-NEXT: st.param.b32 [param1], %r5;
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: use,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), use, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
diff --git a/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll b/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
index 5e85bf4554546..39a2d7f9e1504 100644
--- a/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
+++ b/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
@@ -9,18 +9,16 @@ declare void @conv() convergent
declare void @not_conv()
define void @test(ptr %f) {
- ; CHECK: ConvergentCallUniPrintCall
- ; CHECK-NEXT: @conv
+ ; CHECK: CALL_UNI_conv @conv
call void @conv()
- ; CHECK: CallUniPrintCall
- ; CHECK-NEXT: @not_conv
+ ; CHECK: CALL_UNI @not_conv
call void @not_conv()
- ; CHECK: ConvergentCallPrintCall
+ ; CHECK: CALL_conv %{{[0-9]+}}
call void %f() convergent
- ; CHECK: CallPrintCall
+ ; CHECK: CALL %{{[0-9]+}}
call void %f()
ret void
diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
index 71a46fa6d4820..d1b478d341915 100644
--- a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
@@ -9,12 +9,7 @@ define %struct.64 @test_return_type_mismatch(ptr %p) {
; CHECK-LABEL: test_return_type_mismatch(
; CHECK: .param .align 1 .b8 retval0[8];
; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT: call (retval0),
-; CHECK-NEXT: %rd
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: )
-; CHECK-NEXT: , prototype_0;
+; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_0;
%ret = call %struct.64 @callee(ptr %p)
ret %struct.64 %ret
}
@@ -23,12 +18,7 @@ define i64 @test_param_type_mismatch(ptr %p) {
; CHECK-LABEL: test_param_type_mismatch(
; CHECK: .param .b64 retval0;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _);
-; CHECK-NEXT: call (retval0),
-; CHECK-NEXT: %rd
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: )
-; CHECK-NEXT: , prototype_1;
+; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_1;
%ret = call i64 @callee(i64 7)
ret i64 %ret
}
@@ -37,13 +27,7 @@ define i64 @test_param_count_mismatch(ptr %p) {
; CHECK-LABEL: test_param_count_mismatch(
; CHECK: .param .b64 retval0;
; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _);
-; CHECK-NEXT: call (retval0),
-; CHECK-NEXT: %rd
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: )
-; CHECK-NEXT: , prototype_2;
+; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0, param1), prototype_2;
%ret = call i64 @callee(ptr %p, i64 7)
ret i64 %ret
}
@@ -52,12 +36,7 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_return_type_mismatch_variadic(
; CHECK: .param .align 1 .b8 retval0[8];
; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT: call (retval0),
-; CHECK-NEXT: %rd
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: )
-; CHECK-NEXT: , prototype_3;
+; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_3;
%ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p)
ret %struct.64 %ret
}
@@ -65,12 +44,7 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
define i64 @test_param_type_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_param_type_mismatch_variadic(
; CHECK: .param .b64 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: callee_variadic
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: )
+; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
ret i64 %ret
}
@@ -78,12 +52,7 @@ define i64 @test_param_type_mismatch_variadic(ptr %p) {
define i64 @test_param_count_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_param_count_mismatch_variadic(
; CHECK: .param .b64 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: callee_variadic
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: )
+; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
ret i64 %ret
}
diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
index b73aea76a4528..4d2ba7d00f872 100644
--- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
@@ -24,11 +24,7 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
; CHECK-32-NEXT: .param .b32 param0;
; CHECK-32-NEXT: st.param.b32 [param0], %r5;
; CHECK-32-NEXT: .param .b32 retval0;
-; CHECK-32-NEXT: call.uni (retval0),
-; CHECK-32-NEXT: bar,
-; CHECK-32-NEXT: (
-; CHECK-32-NEXT: param0
-; CHECK-32-NEXT: );
+; CHECK-32-NEXT: call.uni (retval0), bar, (param0);
; CHECK-32-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-32-NEXT: } // callseq 0
; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6;
@@ -49,11 +45,7 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
; CHECK-64-NEXT: .param .b64 param0;
; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
; CHECK-64-NEXT: .param .b32 retval0;
-; CHECK-64-NEXT: call.uni (retval0),
-; CHECK-64-NEXT: bar,
-; CHECK-64-NEXT: (
-; CHECK-64-NEXT: param0
-; CHECK-64-NEXT: );
+; CHECK-64-NEXT: call.uni (retval0), bar, (param0);
; CHECK-64-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-64-NEXT: } // callseq 0
; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1;
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index c905fc04ce780..252edf4b02c76 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -263,12 +263,7 @@ declare half @test_callee(half %a, half %b) #0
; CHECK-DAG: st.param.b16 [param0], [[A]];
; CHECK-DAG: st.param.b16 [param1], [[B]];
; CHECK-DAG: .param .align 2 .b8 retval0[2];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
@@ -287,12 +282,7 @@ define half @test_call(half %a, half %b) #0 {
; CHECK-DAG: st.param.b16 [param0], [[B]];
; CHECK-DAG: st.param.b16 [param1], [[A]];
; CHECK-DAG: .param .align 2 .b8 retval0[2];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
@@ -311,12 +301,7 @@ define half @test_call_flipped(half %a, half %b) #0 {
; CHECK-DAG: st.param.b16 [param0], [[B]];
; CHECK-DAG: st.param.b16 [param1], [[A]];
; CHECK-DAG: .param .align 2 .b8 retval0[2];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK-NEXT: }
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
@@ -650,8 +635,7 @@ else:
; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]];
; CHECK: {
; CHECK: st.param.b64 [param0], %[[P1]];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_dummy
+; CHECK: call.uni (retval0), test_dummy
; CHECK: }
; CHECK: setp.ne.b32 [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 0;
; CHECK: @[[PRED]] bra [[LOOP]];
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index fc7f53c5fdca3..8da2c1d1ebac2 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -467,12 +467,7 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
@@ -495,12 +490,7 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
@@ -523,12 +513,7 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 2
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll
index 327851725991e..b74e531adba3f 100644
--- a/llvm/test/CodeGen/NVPTX/fma.ll
+++ b/llvm/test/CodeGen/NVPTX/fma.ll
@@ -40,12 +40,7 @@ define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
; CHECK-NEXT: .param .b32 param1;
; CHECK-NEXT: st.param.b32 [param1], %r6;
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: dummy_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), dummy_f32, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r7, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
@@ -92,12 +87,7 @@ define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: st.param.b64 [param1], %rd6;
; CHECK-NEXT: .param .b64 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: dummy_f64,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), dummy_f64, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd7, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b64 [func_retval0], %rd7;
diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
index d253df5ed1b9c..ed8f6b4511079 100644
--- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
+++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
@@ -50,11 +50,7 @@ define void @test_ld_param_escaping(ptr byval(i32) %a) {
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd2;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: escape,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni escape, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @escape(ptr %a)
@@ -72,11 +68,7 @@ define void @test_ld_param_byval(ptr byval(i32) %a) {
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: byval_user,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni byval_user, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: ret;
call void @byval_user(ptr %a)
diff --git a/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll b/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll
index d40f514acd408..de69d02ded5e4 100644
--- a/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll
+++ b/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll
@@ -42,11 +42,7 @@ define void @call(fp128 %x) {
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd1, %rd2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @call(fp128 %x)
diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
index bf1fb06c44688..d5ddadf2b21c5 100644
--- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
@@ -647,12 +647,7 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: .param .align 4 .b8 param1[4];
; COMMON-NEXT: st.param.b32 [param1], %r2;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
-; COMMON-NEXT: call.uni (retval0),
-; COMMON-NEXT: test_callee,
-; COMMON-NEXT: (
-; COMMON-NEXT: param0,
-; COMMON-NEXT: param1
-; COMMON-NEXT: );
+; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 0
; COMMON-NEXT: st.param.b32 [func_retval0], %r3;
@@ -675,12 +670,7 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: .param .align 4 .b8 param1[4];
; COMMON-NEXT: st.param.b32 [param1], %r1;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
-; COMMON-NEXT: call.uni (retval0),
-; COMMON-NEXT: test_callee,
-; COMMON-NEXT: (
-; COMMON-NEXT: param0,
-; COMMON-NEXT: param1
-; COMMON-NEXT: );
+; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 1
; COMMON-NEXT: st.param.b32 [func_retval0], %r3;
@@ -703,12 +693,7 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: .param .align 4 .b8 param1[4];
; COMMON-NEXT: st.param.b32 [param1], %r1;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
-; COMMON-NEXT: call.uni (retval0),
-; COMMON-NEXT: test_callee,
-; COMMON-NEXT: (
-; COMMON-NEXT: param0,
-; COMMON-NEXT: param1
-; COMMON-NEXT: );
+; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 2
; COMMON-NEXT: st.param.b32 [func_retval0], %r3;
diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index 7cc7468bc7de7..72c279bee4268 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -833,12 +833,7 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
@@ -861,12 +856,7 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
@@ -889,12 +879,7 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-NEXT: .param .align 4 .b8 param1[4];
; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 2
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
index 1341a04c939c6..eae0321433946 100644
--- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -33,13 +33,7 @@ define internal i32 @foo() {
; CHECK-NEXT: st.param.b64 [param1], %rd4;
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
-; CHECK-NEXT: call (retval0),
-; CHECK-NEXT: %rd1,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: )
-; CHECK-NEXT: , prototype_0;
+; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_0;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
@@ -76,13 +70,7 @@ define internal i32 @bar() {
; CHECK-NEXT: st.param.b64 [param1], %rd5;
; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
-; CHECK-NEXT: call (retval0),
-; CHECK-NEXT: %rd1,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0,
-; CHECK-NEXT: param1
-; CHECK-NEXT: )
-; CHECK-NEXT: , prototype_1;
+; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_1;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
diff --git a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll
index 419c780f7d82a..9e9705709f2bd 100644
--- a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll
+++ b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll
@@ -14,10 +14,7 @@ define void @foo(ptr %ptr) {
; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 16 .b8 retval0[16];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: bar,
-; CHECK-NEXT: (
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), bar, ();
; CHECK-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.v4.b32 [%rd1], {%r1, %r2, %r3, %r4};
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index 2bfd891a04a17..a9004d00e7807 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -58,11 +58,7 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX32-NEXT: { // callseq 0, 0
; PTX32-NEXT: .param .b32 param0;
; PTX32-NEXT: st.param.b32 [param0], %r2;
-; PTX32-NEXT: call.uni
-; PTX32-NEXT: bar,
-; PTX32-NEXT: (
-; PTX32-NEXT: param0
-; PTX32-NEXT: );
+; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 0
; PTX32-NEXT: ret;
;
@@ -84,11 +80,7 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX64-NEXT: { // callseq 0, 0
; PTX64-NEXT: .param .b64 param0;
; PTX64-NEXT: st.param.b64 [param0], %rd1;
-; PTX64-NEXT: call.uni
-; PTX64-NEXT: bar,
-; PTX64-NEXT: (
-; PTX64-NEXT: param0
-; PTX64-NEXT: );
+; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 0
; PTX64-NEXT: ret;
%local = alloca i32, align 4
@@ -159,20 +151,12 @@ define void @foo4() {
; PTX32-NEXT: { // callseq 1, 0
; PTX32-NEXT: .param .b32 param0;
; PTX32-NEXT: st.param.b32 [param0], %r1;
-; PTX32-NEXT: call.uni
-; PTX32-NEXT: bar,
-; PTX32-NEXT: (
-; PTX32-NEXT: param0
-; PTX32-NEXT: );
+; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 1
; PTX32-NEXT: { // callseq 2, 0
; PTX32-NEXT: .param .b32 param0;
; PTX32-NEXT: st.param.b32 [param0], %r3;
-; PTX32-NEXT: call.uni
-; PTX32-NEXT: bar,
-; PTX32-NEXT: (
-; PTX32-NEXT: param0
-; PTX32-NEXT: );
+; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 2
; PTX32-NEXT: ret;
;
@@ -197,20 +181,12 @@ define void @foo4() {
; PTX64-NEXT: { // callseq 1, 0
; PTX64-NEXT: .param .b64 param0;
; PTX64-NEXT: st.param.b64 [param0], %rd1;
-; PTX64-NEXT: call.uni
-; PTX64-NEXT: bar,
-; PTX64-NEXT: (
-; PTX64-NEXT: param0
-; PTX64-NEXT: );
+; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 1
; PTX64-NEXT: { // callseq 2, 0
; PTX64-NEXT: .param .b64 param0;
; PTX64-NEXT: st.param.b64 [param0], %rd3;
-; PTX64-NEXT: call.uni
-; PTX64-NEXT: bar,
-; PTX64-NEXT: (
-; PTX64-NEXT: param0
-; PTX64-NEXT: );
+; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 2
; PTX64-NEXT: ret;
%A = alloca i32
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index c3f94455b3038..0a2cd81ac904c 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -133,12 +133,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-NEXT: st.param.b64 [param0], %rd3;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _);
-; PTX-NEXT: call (retval0),
-; PTX-NEXT: %rd1,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: )
-; PTX-NEXT: , prototype_0;
+; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0;
; PTX-NEXT: ld.param.b32 %r1, [retval0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
@@ -182,14 +177,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: st.param.b64 [param2], %rd4;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
-; PTX-NEXT: call (retval0),
-; PTX-NEXT: %rd1,
-; PTX-NEXT: (
-; PTX-NEXT: param0,
-; PTX-NEXT: param1,
-; PTX-NEXT: param2
-; PTX-NEXT: )
-; PTX-NEXT: , prototype_1;
+; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1;
; PTX-NEXT: ld.param.b32 %r2, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
@@ -284,12 +272,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _);
-; PTX-NEXT: call (retval0),
-; PTX-NEXT: %rd1,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: )
-; PTX-NEXT: , prototype_2;
+; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2;
; PTX-NEXT: ld.param.b32 %r3, [retval0];
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
@@ -330,12 +313,7 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _);
-; PTX-NEXT: call (retval0),
-; PTX-NEXT: %rd1,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: )
-; PTX-NEXT: , prototype_3;
+; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3;
; PTX-NEXT: ld.param.b32 %r4, [retval0];
; PTX-NEXT: } // callseq 3
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
@@ -561,11 +539,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; PTX-NEXT: { // callseq 4, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
; PTX-NEXT: st.param.b32 [param0], %r1;
-; PTX-NEXT: call.uni
-; PTX-NEXT: device_func,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 4
; PTX-NEXT: ret;
call void @device_func(ptr byval(i32) align 4 %input)
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 246408ecf6a3a..6f334b075241b 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes IR,IRC
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes IR,IRO
; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX,PTXC
@@ -47,11 +47,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b64 retval0;
-; PTX-NEXT: call.uni (retval0),
-; PTX-NEXT: escape,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: call.uni (retval0), escape, (param0);
; PTX-NEXT: ld.param.b64 %rd6, [retval0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
@@ -89,11 +85,7 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: .param .b64 retval0;
-; PTX-NEXT: call.uni (retval0),
-; PTX-NEXT: escape,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: call.uni (retval0), escape, (param0);
; PTX-NEXT: ld.param.b64 %rd3, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 54495cf0d61f3..d268562914755 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -153,11 +153,7 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd1;
-; PTX-NEXT: call.uni
-; PTX-NEXT: _Z6escapePv,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: call.uni _Z6escapePv, (param0);
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
entry:
@@ -198,11 +194,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd3;
-; PTX-NEXT: call.uni
-; PTX-NEXT: _Z6escapePv,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: call.uni _Z6escapePv, (param0);
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
entry:
@@ -902,11 +894,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; PTX-NEXT: { // callseq 2, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
; PTX-NEXT: st.param.b32 [param0], %r1;
-; PTX-NEXT: call.uni
-; PTX-NEXT: device_func,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
call void @device_func(ptr byval(i32) align 4 %input)
@@ -929,11 +917,7 @@ define void @device_func(ptr byval(i32) align 4 %input) {
; PTX-NEXT: { // callseq 3, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
; PTX-NEXT: st.param.b32 [param0], %r1;
-; PTX-NEXT: call.uni
-; PTX-NEXT: device_func,
-; PTX-NEXT: (
-; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 3
; PTX-NEXT: ret;
call void @device_func(ptr byval(i32) align 4 %input)
diff --git a/llvm/test/CodeGen/NVPTX/misched_func_call.ll b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
index 7e907990147a5..2e9eb6913ac0e 100644
--- a/llvm/test/CodeGen/NVPTX/misched_func_call.ll
+++ b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
@@ -21,11 +21,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], 0d0000000000000000;
; CHECK-NEXT: .param .b64 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: quux,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), quux, (param0);
; CHECK-NEXT: ld.param.b64 %rd1, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3;
diff --git a/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll
index a1f0577c2218b..448960181ae42 100644
--- a/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll
+++ b/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll
@@ -11,10 +11,7 @@ define dso_local void @naked() naked "frame-pointer"="all" {
; CHECK-32-EMPTY:
; CHECK-32-NEXT: // %bb.0:
; CHECK-32-NEXT: { // callseq 0, 0
-; CHECK-32-NEXT: call.uni
-; CHECK-32-NEXT: main,
-; CHECK-32-NEXT: (
-; CHECK-32-NEXT: );
+; CHECK-32-NEXT: call.uni main, ();
; CHECK-32-NEXT: } // callseq 0
; CHECK-32-NEXT: // begin inline asm
; CHECK-32-NEXT: exit;
@@ -26,10 +23,7 @@ define dso_local void @naked() naked "frame-pointer"="all" {
; CHECK-64-EMPTY:
; CHECK-64-NEXT: // %bb.0:
; CHECK-64-NEXT: { // callseq 0, 0
-; CHECK-64-NEXT: call.uni
-; CHECK-64-NEXT: main,
-; CHECK-64-NEXT: (
-; CHECK-64-NEXT: );
+; CHECK-64-NEXT: call.uni main, ();
; CHECK-64-NEXT: } // callseq 0
; CHECK-64-NEXT: // begin inline asm
; CHECK-64-NEXT: exit;
@@ -45,10 +39,7 @@ define dso_local void @normal() "frame-pointer"="all" {
; CHECK-32-EMPTY:
; CHECK-32-NEXT: // %bb.0:
; CHECK-32-NEXT: { // callseq 1, 0
-; CHECK-32-NEXT: call.uni
-; CHECK-32-NEXT: main,
-; CHECK-32-NEXT: (
-; CHECK-32-NEXT: );
+; CHECK-32-NEXT: call.uni main, ();
; CHECK-32-NEXT: } // callseq 1
; CHECK-32-NEXT: // begin inline asm
; CHECK-32-NEXT: exit;
@@ -60,10 +51,7 @@ define dso_local void @normal() "frame-pointer"="all" {
; CHECK-64-EMPTY:
; CHECK-64-NEXT: // %bb.0:
; CHECK-64-NEXT: { // callseq 1, 0
-; CHECK-64-NEXT: call.uni
-; CHECK-64-NEXT: main,
-; CHECK-64-NEXT: (
-; CHECK-64-NEXT: );
+; CHECK-64-NEXT: call.uni main, ();
; CHECK-64-NEXT: } // callseq 1
; CHECK-64-NEXT: // begin inline asm
; CHECK-64-NEXT: exit;
diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll
index 4fc8786c1e2fe..cd2664e913824 100644
--- a/llvm/test/CodeGen/NVPTX/param-add.ll
+++ b/llvm/test/CodeGen/NVPTX/param-add.ll
@@ -37,11 +37,7 @@ define i32 @test(%struct.1float alignstack(32) %data) {
; CHECK-NEXT: st.param.b8 [param0+2], %r12;
; CHECK-NEXT: st.param.b8 [param0+3], %r13;
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), callee, (param0);
; CHECK-NEXT: ld.param.b32 %r14, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll
index 4bea710e6dd93..263477df1dbfe 100644
--- a/llvm/test/CodeGen/NVPTX/param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll
@@ -32,8 +32,7 @@
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[C]]
; CHECK: .param .b32 retval0;
-; CHECK: call.uni
-; CHECK-NEXT: test_i1,
+; CHECK: call.uni (retval0), test_i1,
; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0];
; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1;
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -76,8 +75,7 @@ define signext i1 @test_i1s(i1 signext %a) {
; CHECK-DAG: st.param.b8 [param0], [[E0]];
; CHECK-DAG: st.param.b8 [param0+2], [[E2]];
; CHECK: .param .align 1 .b8 retval0[1];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i1,
+; CHECK: call.uni (retval0), test_v3i1,
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]]
@@ -95,8 +93,7 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) {
; CHECK: .param .align 1 .b8 param0[1];
; CHECK: st.param.b8 [param0], [[E0]];
; CHECK: .param .align 1 .b8 retval0[1];
-; CHECK: call.uni (retval0),
-; CHECK: test_v4i1,
+; CHECK: call.uni (retval0), test_v4i1,
; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1];
; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
@@ -120,8 +117,7 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) {
; CHECK-DAG: st.param.b8 [param0], [[E0]];
; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
; CHECK: .param .align 1 .b8 retval0[1];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v5i1,
+; CHECK: call.uni (retval0), test_v5i1,
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
; CHECK-DAG: st.param.b8 [func_retval0], [[RE0]]
@@ -139,8 +135,7 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK: test_i2,
+; CHECK: call.uni (retval0), test_i2,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -156,8 +151,7 @@ define i2 @test_i2(i2 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK: test_i3,
+; CHECK: call.uni (retval0), test_i3,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -176,8 +170,7 @@ define i3 @test_i3(i3 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[A]];
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK: test_i8,
+; CHECK: call.uni (retval0), test_i8,
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255;
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -196,8 +189,7 @@ define i8 @test_i8(i8 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[A]];
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK: test_i8s,
+; CHECK: call.uni (retval0), test_i8s,
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]];
@@ -216,8 +208,7 @@ define signext i8 @test_i8s(i8 signext %a) {
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.b32 [param0], [[R]]
; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i8,
+; CHECK: call.uni (retval0), test_v3i8,
; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0];
; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very
; interesting here, so it's skipped.
@@ -235,8 +226,7 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) {
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.b32 [param0], [[R]];
; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v4i8,
+; CHECK: call.uni (retval0), test_v4i8,
; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[RET]];
; CHECK-NEXT: ret;
@@ -254,8 +244,7 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) {
; CHECK-DAG: st.param.v4.b8 [param0],
; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v5i8,
+; CHECK: call.uni (retval0), test_v5i8,
; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
; CHECK-DAG: st.param.v4.b8 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -272,8 +261,7 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) {
; CHECK: ld.param.b16 {{%rs[0-9]+}}, [test_i11_param_0];
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i11,
+; CHECK: call.uni (retval0), test_i11,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -290,8 +278,7 @@ define i11 @test_i11(i11 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i16,
+; CHECK: call.uni (retval0), test_i16,
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535;
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -309,8 +296,7 @@ define i16 @test_i16(i16 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i16s,
+; CHECK: call.uni (retval0), test_i16s,
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -329,8 +315,7 @@ define signext i16 @test_i16s(i16 signext %a) {
; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
; CHECK: st.param.b16 [param0+4], [[E2]];
; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i16,
+; CHECK: call.uni (retval0), test_v3i16,
; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0];
; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4];
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]};
@@ -348,8 +333,7 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) {
; CHECK: .param .align 8 .b8 param0[8];
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v4i16,
+; CHECK: call.uni (retval0), test_v4i16,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
; CHECK-NEXT: ret;
@@ -367,8 +351,7 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) {
; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v5i16,
+; CHECK: call.uni (retval0), test_v5i16,
; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8];
; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -386,8 +369,7 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) {
; CHECK: .param .align 2 .b8 param0[2];
; CHECK: st.param.b16 [param0], [[E]];
; CHECK: .param .align 2 .b8 retval0[2];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_f16,
+; CHECK: call.uni (retval0), test_f16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]]
; CHECK-NEXT: ret;
@@ -403,8 +385,7 @@ define half @test_f16(half %a) {
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v2f16,
+; CHECK: call.uni (retval0), test_v2f16,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]]
; CHECK-NEXT: ret;
@@ -420,8 +401,7 @@ define <2 x half> @test_v2f16(<2 x half> %a) {
; CHECK: .param .align 2 .b8 param0[2];
; CHECK: st.param.b16 [param0], [[E]];
; CHECK: .param .align 2 .b8 retval0[2];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_bf16,
+; CHECK: call.uni (retval0), test_bf16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]]
; CHECK-NEXT: ret;
@@ -437,8 +417,7 @@ define bfloat @test_bf16(bfloat %a) {
; CHECK: .param .align 4 .b8 param0[4];
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v2bf16,
+; CHECK: call.uni (retval0), test_v2bf16,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]]
; CHECK-NEXT: ret;
@@ -457,8 +436,7 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) {
; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK: test_v3f16,
+; CHECK: call.uni (retval0), test_v3f16,
; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4];
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]};
@@ -476,8 +454,7 @@ define <3 x half> @test_v3f16(<3 x half> %a) {
; CHECK: .param .align 8 .b8 param0[8];
; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]};
; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK: test_v4f16,
+; CHECK: call.uni (retval0), test_v4f16,
; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]};
; CHECK: ret;
@@ -495,8 +472,7 @@ define <4 x half> @test_v4f16(<4 x half> %a) {
; CHECK-DAG: st.param.v4.b16 [param0],
; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK: test_v5f16,
+; CHECK: call.uni (retval0), test_v5f16,
; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8];
; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
@@ -514,8 +490,7 @@ define <5 x half> @test_v5f16(<5 x half> %a) {
; CHECK: .param .align 16 .b8 param0[16];
; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK: test_v8f16,
+; CHECK: call.uni (retval0), test_v8f16,
; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
; CHECK: ret;
@@ -535,8 +510,7 @@ define <8 x half> @test_v8f16(<8 x half> %a) {
; CHECK-DAG: st.param.v4.b16 [param0+8],
; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
; CHECK: .param .align 32 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK: test_v9f16,
+; CHECK: call.uni (retval0), test_v9f16,
; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16];
@@ -557,8 +531,7 @@ define <9 x half> @test_v9f16(<9 x half> %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i19,
+; CHECK: call.uni (retval0), test_i19,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -575,8 +548,7 @@ define i19 @test_i19(i19 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i23,
+; CHECK: call.uni (retval0), test_i23,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -593,8 +565,7 @@ define i23 @test_i23(i23 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i24,
+; CHECK: call.uni (retval0), test_i24,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -610,8 +581,7 @@ define i24 @test_i24(i24 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i29,
+; CHECK: call.uni (retval0), test_i29,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
; CHECK-NEXT: ret;
@@ -627,8 +597,7 @@ define i29 @test_i29(i29 %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i32,
+; CHECK: call.uni (retval0), test_i32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -646,8 +615,7 @@ define i32 @test_i32(i32 %a) {
; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
; CHECK: st.param.b32 [param0+8], [[E2]];
; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i32,
+; CHECK: call.uni (retval0), test_v3i32,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]};
@@ -665,8 +633,7 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) {
; CHECK: .param .align 16 .b8 param0[16];
; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK: .param .align 16 .b8 retval0[16];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v4i32,
+; CHECK: call.uni (retval0), test_v4i32,
; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
; CHECK-NEXT: ret;
@@ -684,8 +651,7 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) {
; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
; CHECK: .param .align 32 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v5i32,
+; CHECK: call.uni (retval0), test_v5i32,
; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -703,8 +669,7 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) {
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .b32 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_f32,
+; CHECK: call.uni (retval0), test_f32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -721,8 +686,7 @@ define float @test_f32(float %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i40,
+; CHECK: call.uni (retval0), test_i40,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -739,8 +703,7 @@ define i40 @test_i40(i40 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i47,
+; CHECK: call.uni (retval0), test_i47,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -757,8 +720,7 @@ define i47 @test_i47(i47 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i48,
+; CHECK: call.uni (retval0), test_i48,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -776,8 +738,7 @@ define i48 @test_i48(i48 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i51,
+; CHECK: call.uni (retval0), test_i51,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -795,8 +756,7 @@ define i51 @test_i51(i51 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i56,
+; CHECK: call.uni (retval0), test_i56,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -812,8 +772,7 @@ define i56 @test_i56(i56 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i57,
+; CHECK: call.uni (retval0), test_i57,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
; CHECK-NEXT: ret;
@@ -829,8 +788,7 @@ define i57 @test_i57(i57 %a) {
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], [[E]];
; CHECK: .param .b64 retval0;
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_i64,
+; CHECK: call.uni (retval0), test_i64,
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
; CHECK: st.param.b64 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -848,8 +806,7 @@ define i64 @test_i64(i64 %a) {
; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
; CHECK: st.param.b64 [param0+16], [[E2]];
; CHECK: .param .align 32 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v3i64,
+; CHECK: call.uni (retval0), test_v3i64,
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16];
; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RE0]], [[RE1]]};
@@ -872,8 +829,7 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) {
; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
; CHECK: .param .align 32 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_v4i64,
+; CHECK: call.uni (retval0), test_v4i64,
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]};
@@ -893,8 +849,7 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) {
; CHECK: .param .align 1 .b8 param0[1];
; CHECK: st.param.b8 [param0], [[A]]
; CHECK: .param .align 1 .b8 retval0[1];
-; CHECK: call.uni
-; CHECK-NEXT: test_s_i1,
+; CHECK: call.uni (retval0), test_s_i1,
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b8 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -910,8 +865,7 @@ define %s_i1 @test_s_i1(%s_i1 %a) {
; CHECK: .param .align 1 .b8 param0[1];
; CHECK: st.param.b8 [param0], [[A]]
; CHECK: .param .align 1 .b8 retval0[1];
-; CHECK: call.uni
-; CHECK-NEXT: test_s_i8,
+; CHECK: call.uni (retval0), test_s_i8,
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b8 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -927,8 +881,7 @@ define %s_i8 @test_s_i8(%s_i8 %a) {
; CHECK: .param .align 2 .b8 param0[2];
; CHECK: st.param.b16 [param0], [[A]]
; CHECK: .param .align 2 .b8 retval0[2];
-; CHECK: call.uni
-; CHECK-NEXT: test_s_i16,
+; CHECK: call.uni (retval0), test_s_i16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -944,8 +897,7 @@ define %s_i16 @test_s_i16(%s_i16 %a) {
; CHECK: .param .align 2 .b8 param0[2];
; CHECK: st.param.b16 [param0], [[A]]
; CHECK: .param .align 2 .b8 retval0[2];
-; CHECK: call.uni
-; CHECK-NEXT: test_s_f16,
+; CHECK: call.uni (retval0), test_s_f16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -961,8 +913,7 @@ define %s_f16 @test_s_f16(%s_f16 %a) {
; CHECK: .param .align 4 .b8 param0[4]
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i32,
+; CHECK: call.uni (retval0), test_s_i32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -978,8 +929,7 @@ define %s_i32 @test_s_i32(%s_i32 %a) {
; CHECK: .param .align 4 .b8 param0[4]
; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_f32,
+; CHECK: call.uni (retval0), test_s_f32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -995,8 +945,7 @@ define %s_f32 @test_s_f32(%s_f32 %a) {
; CHECK: .param .align 8 .b8 param0[8];
; CHECK: st.param.b64 [param0], [[E]];
; CHECK: .param .align 8 .b8 retval0[8];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i64,
+; CHECK: call.uni (retval0), test_s_i64,
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
; CHECK: st.param.b64 [func_retval0], [[R]];
; CHECK-NEXT: ret;
@@ -1021,8 +970,7 @@ define %s_i64 @test_s_i64(%s_i64 %a) {
; CHECK-DAG: st.param.b32 [param0+12], [[E3]];
; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i32f32,
+; CHECK: call.uni (retval0), test_s_i32f32,
; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b32 [[RE1:%r[0-9]+]], [retval0+4];
; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
@@ -1051,8 +999,7 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
; CHECK: st.param.b64 [param0+16], [[E4]];
; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i32x4,
+; CHECK: call.uni (retval0), test_s_i32x4,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
@@ -1081,8 +1028,7 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
; CHECK: st.param.b32 [param0+16], [[E4]];
; CHECK: st.param.b64 [param0+24], [[E5]];
; CHECK: .param .align 8 .b8 retval0[32];
-; CHECK: call.uni (retval0),
-; CHECK: test_s_i1i32x4,
+; CHECK: call.uni (retval0), test_s_i1i32x4,
; CHECK: (
; CHECK: param0
; CHECK: );
@@ -1160,8 +1106,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
; CHECK-DAG: st.param.b8 [param0+23],
; CHECK-DAG: st.param.b8 [param0+24],
; CHECK: .param .align 1 .b8 retval0[25];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_s_i1i32x4p,
+; CHECK: call.uni (retval0), test_s_i1i32x4p,
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0];
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1];
; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2];
@@ -1237,8 +1182,7 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
; CHECK: st.param.b32 [param0+64], [[E15]];
; CHECK: .param .align 16 .b8 retval0[80];
-; CHECK: call.uni (retval0),
-; CHECK: test_s_crossfield,
+; CHECK: call.uni (retval0), test_s_crossfield,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];
diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll
index 22a648c7a9786..f490c5f73d425 100644
--- a/llvm/test/CodeGen/NVPTX/param-overalign.ll
+++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx | FileCheck %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
@@ -18,27 +19,23 @@ target triple = "nvptx64-nvidia-cuda"
; CHECK-NEXT: ;
define float @caller_md(float %a, float %b) {
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) caller_md(
-; CHECK-NEXT: .param .b32 caller_md_param_0,
-; CHECK-NEXT: .param .b32 caller_md_param_1
-; CHECK-NEXT: )
-; CHECK-NEXT: {
-
-; CHECK: ld.param.b32 %r1, [caller_md_param_0];
+; CHECK-LABEL: caller_md(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [caller_md_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1];
-; CHECK-NEXT: {
+; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: callee_md,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), callee_md, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
-; CHECK-NEXT: }
+; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
+
%s1 = insertvalue %struct.float2 poison, float %a, 0
%s2 = insertvalue %struct.float2 %s1, float %b, 1
%r = call float @callee_md(%struct.float2 %s2)
@@ -46,15 +43,16 @@ define float @caller_md(float %a, float %b) {
}
define float @callee_md(%struct.float2 alignstack(8) %a) {
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee_md(
-; CHECK-NEXT: .param .align 8 .b8 callee_md_param_0[8]
-; CHECK-NEXT: )
-; CHECK-NEXT: {
-
-; CHECK: ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0];
+; CHECK-LABEL: callee_md(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0];
; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
+
%v0 = extractvalue %struct.float2 %a, 0
%v1 = extractvalue %struct.float2 %a, 1
%2 = fadd float %v0, %v1
@@ -62,27 +60,23 @@ define float @callee_md(%struct.float2 alignstack(8) %a) {
}
define float @caller(float %a, float %b) {
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) caller(
-; CHECK-NEXT: .param .b32 caller_param_0,
-; CHECK-NEXT: .param .b32 caller_param_1
-; CHECK-NEXT: )
-; CHECK-NEXT: {
-
-; CHECK: ld.param.b32 %r1, [caller_param_0];
+; CHECK-LABEL: caller(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [caller_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1];
-; CHECK-NEXT: {
+; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: callee,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), callee, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
-; CHECK-NEXT: }
+; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
+
%s1 = insertvalue %struct.float2 poison, float %a, 0
%s2 = insertvalue %struct.float2 %s1, float %b, 1
%r = call float @callee(%struct.float2 %s2)
@@ -90,15 +84,16 @@ define float @caller(float %a, float %b) {
}
define float @callee(%struct.float2 alignstack(8) %a ) {
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee(
-; CHECK-NEXT: .param .align 8 .b8 callee_param_0[8]
-; CHECK-NEXT: )
-; CHECK-NEXT: {
-
-; CHECK: ld.param.v2.b32 {%r1, %r2}, [callee_param_0];
+; CHECK-LABEL: callee(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [callee_param_0];
; CHECK-NEXT: add.rn.f32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
+
%v0 = extractvalue %struct.float2 %a, 0
%v1 = extractvalue %struct.float2 %a, 1
%2 = fadd float %v0, %v1
@@ -106,9 +101,15 @@ define float @callee(%struct.float2 alignstack(8) %a ) {
}
define alignstack(8) %struct.float2 @aligned_return(%struct.float2 %a ) {
-; CHECK-LABEL: .visible .func (.param .align 8 .b8 func_retval0[8]) aligned_return(
-; CHECK-NEXT: .param .align 4 .b8 aligned_return_param_0[8]
-; CHECK-NEXT: )
-; CHECK-NEXT: {
+; CHECK-LABEL: aligned_return(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [aligned_return_param_0+4];
+; CHECK-NEXT: ld.param.b32 %r2, [aligned_return_param_0];
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0+4], %r1;
+; CHECK-NEXT: ret;
ret %struct.float2 %a
}
diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
index abb1aff867754..892e49a5fe82a 100644
--- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
+++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
@@ -86,11 +86,7 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[4];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St4x1,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St4x1, (param0);
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
%1 = load i32, ptr %in, align 4
%call = tail call fastcc [1 x i32] @callee_St4x1(i32 %1) #2
@@ -118,11 +114,7 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .align 16 .b8 param0[8];
; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[8];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St4x2,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St4x2, (param0);
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
%agg.tmp = alloca %struct.St4x2, align 8
%1 = load i64, ptr %in, align 4
@@ -160,11 +152,7 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[12];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St4x3,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St4x3, (param0);
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8];
%call = tail call fastcc [3 x i32] @callee_St4x3(ptr noundef nonnull byval(%struct.St4x3) align 4 %in) #2
@@ -207,11 +195,7 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .align 16 .b8 param0[16];
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[16];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St4x4,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St4x4, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
%call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2
%.fca.0.extract = extractvalue [4 x i32] %call, 0
@@ -258,11 +242,7 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[20];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St4x5,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St4x5, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16];
%call = tail call fastcc [5 x i32] @callee_St4x5(ptr noundef nonnull byval(%struct.St4x5) align 4 %in) #2
@@ -318,11 +298,7 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[24];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St4x6,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St4x6, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
%call = tail call fastcc [6 x i32] @callee_St4x6(ptr noundef nonnull byval(%struct.St4x6) align 4 %in) #2
@@ -385,11 +361,7 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[28];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St4x7,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St4x7, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+24];
@@ -460,11 +432,7 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[32];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St4x8,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St4x8, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
%call = tail call fastcc [8 x i32] @callee_St4x8(ptr noundef nonnull byval(%struct.St4x8) align 4 %in) #2
@@ -537,11 +505,7 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 param0;
; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[8];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St8x1,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St8x1, (param0);
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
%1 = load i64, ptr %in, align 8
%call = tail call fastcc [1 x i64] @callee_St8x1(i64 %1) #2
@@ -569,11 +533,7 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .align 16 .b8 param0[16];
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[16];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St8x2,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St8x2, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
%call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2
%.fca.0.extract = extractvalue [2 x i64] %call, 0
@@ -608,11 +568,7 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[24];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St8x3,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St8x3, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16];
%call = tail call fastcc [3 x i64] @callee_St8x3(ptr noundef nonnull byval(%struct.St8x3) align 8 %in) #2
@@ -656,11 +612,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[32];
- ; CHECK: call.uni (retval0),
- ; CHECK-NEXT: callee_St8x4,
- ; CHECK-NEXT: (
- ; CHECK-NEXT: param0
- ; CHECK-NEXT: );
+ ; CHECK: call.uni (retval0), callee_St8x4, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16];
%call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index b165b4cb4b262..f0813609268e9 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -131,11 +131,7 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd3;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: use,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni use, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
@@ -164,11 +160,7 @@ define i64 @test_negative_use_shl(i64 %x, i32 %y) {
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd2;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: use,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni use, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
index bdab9958fe2b2..50d3e8049a947 100644
--- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll
+++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
@@ -28,11 +28,7 @@ define void @st_param_i8_i16() {
; CHECK-NEXT: .param .align 2 .b8 param0[4];
; CHECK-NEXT: st.param.b8 [param0], 1;
; CHECK-NEXT: st.param.b16 [param0+2], 2;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_i8_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_i8_i16, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @call_i8_i16(%struct.A { i8 1, i16 2 })
@@ -48,11 +44,7 @@ define void @st_param_i32() {
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .b32 param0;
; CHECK-NEXT: st.param.b32 [param0], 3;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_i32, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: ret;
call void @call_i32(i32 3)
@@ -68,11 +60,7 @@ define void @st_param_i64() {
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], 4;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_i64,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_i64, (param0);
; CHECK-NEXT: } // callseq 2
; CHECK-NEXT: ret;
call void @call_i64(i64 4)
@@ -88,11 +76,7 @@ define void @st_param_f32() {
; CHECK-NEXT: { // callseq 3, 0
; CHECK-NEXT: .param .b32 param0;
; CHECK-NEXT: st.param.b32 [param0], 0f40A00000;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_f32, (param0);
; CHECK-NEXT: } // callseq 3
; CHECK-NEXT: ret;
call void @call_f32(float 5.0)
@@ -108,11 +92,7 @@ define void @st_param_f64() {
; CHECK-NEXT: { // callseq 4, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], 0d4018000000000000;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_f64,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_f64, (param0);
; CHECK-NEXT: } // callseq 4
; CHECK-NEXT: ret;
call void @call_f64(double 6.0)
@@ -134,11 +114,7 @@ define void @st_param_v2_i8_ii() {
; CHECK-NEXT: { // callseq 5, 0
; CHECK-NEXT: .param .align 2 .b8 param0[2];
; CHECK-NEXT: st.param.v2.b8 [param0], {1, 2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i8, (param0);
; CHECK-NEXT: } // callseq 5
; CHECK-NEXT: ret;
call void @call_v2_i8(%struct.char2 { i8 1, i8 2 })
@@ -154,11 +130,7 @@ define void @st_param_v2_i8_ir(i8 %val) {
; CHECK-NEXT: { // callseq 6, 0
; CHECK-NEXT: .param .align 2 .b8 param0[2];
; CHECK-NEXT: st.param.v2.b8 [param0], {1, %rs1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i8, (param0);
; CHECK-NEXT: } // callseq 6
; CHECK-NEXT: ret;
%struct.ir0 = insertvalue %struct.char2 poison, i8 1, 0
@@ -176,11 +148,7 @@ define void @st_param_v2_i8_ri(i8 %val) {
; CHECK-NEXT: { // callseq 7, 0
; CHECK-NEXT: .param .align 2 .b8 param0[2];
; CHECK-NEXT: st.param.v2.b8 [param0], {%rs1, 2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i8, (param0);
; CHECK-NEXT: } // callseq 7
; CHECK-NEXT: ret;
%struct.ri0 = insertvalue %struct.char2 poison, i8 %val, 0
@@ -198,11 +166,7 @@ define void @st_param_v2_i16_ii() {
; CHECK-NEXT: { // callseq 8, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v2.b16 [param0], {1, 2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i16, (param0);
; CHECK-NEXT: } // callseq 8
; CHECK-NEXT: ret;
call void @call_v2_i16(%struct.short2 { i16 1, i16 2 })
@@ -218,11 +182,7 @@ define void @st_param_v2_i16_ir(i16 %val) {
; CHECK-NEXT: { // callseq 9, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v2.b16 [param0], {1, %rs1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i16, (param0);
; CHECK-NEXT: } // callseq 9
; CHECK-NEXT: ret;
%struct.ir0 = insertvalue %struct.short2 poison, i16 1, 0
@@ -240,11 +200,7 @@ define void @st_param_v2_i16_ri(i16 %val) {
; CHECK-NEXT: { // callseq 10, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v2.b16 [param0], {%rs1, 2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i16, (param0);
; CHECK-NEXT: } // callseq 10
; CHECK-NEXT: ret;
%struct.ri0 = insertvalue %struct.short2 poison, i16 %val, 0
@@ -262,11 +218,7 @@ define void @st_param_v2_i32_ii() {
; CHECK-NEXT: { // callseq 11, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {1, 2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i32, (param0);
; CHECK-NEXT: } // callseq 11
; CHECK-NEXT: ret;
call void @call_v2_i32(%struct.int2 { i32 1, i32 2 })
@@ -282,11 +234,7 @@ define void @st_param_v2_i32_ir(i32 %val) {
; CHECK-NEXT: { // callseq 12, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {1, %r1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i32, (param0);
; CHECK-NEXT: } // callseq 12
; CHECK-NEXT: ret;
%struct.ir0 = insertvalue %struct.int2 poison, i32 1, 0
@@ -304,11 +252,7 @@ define void @st_param_v2_i32_ri(i32 %val) {
; CHECK-NEXT: { // callseq 13, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i32, (param0);
; CHECK-NEXT: } // callseq 13
; CHECK-NEXT: ret;
%struct.ri0 = insertvalue %struct.int2 poison, i32 %val, 0
@@ -326,11 +270,7 @@ define void @st_param_v2_i64_ii() {
; CHECK-NEXT: { // callseq 14, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v2.b64 [param0], {1, 2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i64,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i64, (param0);
; CHECK-NEXT: } // callseq 14
; CHECK-NEXT: ret;
call void @call_v2_i64(%struct.longlong2 { i64 1, i64 2 })
@@ -346,11 +286,7 @@ define void @st_param_v2_i64_ir(i64 %val) {
; CHECK-NEXT: { // callseq 15, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v2.b64 [param0], {1, %rd1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i64,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i64, (param0);
; CHECK-NEXT: } // callseq 15
; CHECK-NEXT: ret;
%struct.ir0 = insertvalue %struct.longlong2 poison, i64 1, 0
@@ -368,11 +304,7 @@ define void @st_param_v2_i64_ri(i64 %val) {
; CHECK-NEXT: { // callseq 16, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd1, 2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_i64,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_i64, (param0);
; CHECK-NEXT: } // callseq 16
; CHECK-NEXT: ret;
%struct.ri0 = insertvalue %struct.longlong2 poison, i64 %val, 0
@@ -390,11 +322,7 @@ define void @st_param_v2_f32_ii(float %val) {
; CHECK-NEXT: { // callseq 17, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {0f3F800000, 0f40000000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_f32, (param0);
; CHECK-NEXT: } // callseq 17
; CHECK-NEXT: ret;
call void @call_v2_f32(%struct.float2 { float 1.0, float 2.0 })
@@ -410,11 +338,7 @@ define void @st_param_v2_f32_ir(float %val) {
; CHECK-NEXT: { // callseq 18, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {0f3F800000, %r1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_f32, (param0);
; CHECK-NEXT: } // callseq 18
; CHECK-NEXT: ret;
%struct.ir0 = insertvalue %struct.float2 poison, float 1.0, 0
@@ -432,11 +356,7 @@ define void @st_param_v2_f32_ri(float %val) {
; CHECK-NEXT: { // callseq 19, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 0f40000000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_f32, (param0);
; CHECK-NEXT: } // callseq 19
; CHECK-NEXT: ret;
%struct.ri0 = insertvalue %struct.float2 poison, float %val, 0
@@ -454,11 +374,7 @@ define void @st_param_v2_f64_ii(double %val) {
; CHECK-NEXT: { // callseq 20, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v2.b64 [param0], {0d3FF0000000000000, 0d4000000000000000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_f64,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_f64, (param0);
; CHECK-NEXT: } // callseq 20
; CHECK-NEXT: ret;
call void @call_v2_f64(%struct.double2 { double 1.0, double 2.0 })
@@ -474,11 +390,7 @@ define void @st_param_v2_f64_ir(double %val) {
; CHECK-NEXT: { // callseq 21, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v2.b64 [param0], {0d3FF0000000000000, %rd1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_f64,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_f64, (param0);
; CHECK-NEXT: } // callseq 21
; CHECK-NEXT: ret;
%struct.ir0 = insertvalue %struct.double2 poison, double 1.0, 0
@@ -496,11 +408,7 @@ define void @st_param_v2_f64_ri(double %val) {
; CHECK-NEXT: { // callseq 22, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd1, 0d4000000000000000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v2_f64,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v2_f64, (param0);
; CHECK-NEXT: } // callseq 22
; CHECK-NEXT: ret;
%struct.ri0 = insertvalue %struct.double2 poison, double %val, 0
@@ -525,11 +433,7 @@ define void @st_param_v4_i8_iiii() {
; CHECK-NEXT: { // callseq 23, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 23
; CHECK-NEXT: ret;
call void @call_v4_i8(%struct.char4 { i8 1, i8 2, i8 3, i8 4 })
@@ -547,11 +451,7 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
; CHECK-NEXT: { // callseq 24, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, %rs2, %rs3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 24
; CHECK-NEXT: ret;
%struct.irrr0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -573,11 +473,7 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
; CHECK-NEXT: { // callseq 25, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, %rs2, %rs3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 25
; CHECK-NEXT: ret;
%struct.rirr0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -599,11 +495,7 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
; CHECK-NEXT: { // callseq 26, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, 3, %rs3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 26
; CHECK-NEXT: ret;
%struct.rrir0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -625,11 +517,7 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
; CHECK-NEXT: { // callseq 27, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, %rs3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 27
; CHECK-NEXT: ret;
%struct.rrri0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -650,11 +538,7 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
; CHECK-NEXT: { // callseq 28, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, %rs2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 28
; CHECK-NEXT: ret;
%struct.iirr0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -675,11 +559,7 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
; CHECK-NEXT: { // callseq 29, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, %rs2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 29
; CHECK-NEXT: ret;
%struct.irir0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -700,11 +580,7 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
; CHECK-NEXT: { // callseq 30, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, %rs2, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 30
; CHECK-NEXT: ret;
%struct.irri0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -725,11 +601,7 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
; CHECK-NEXT: { // callseq 31, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, %rs2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 31
; CHECK-NEXT: ret;
%struct.riir0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -750,11 +622,7 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
; CHECK-NEXT: { // callseq 32, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, %rs2, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 32
; CHECK-NEXT: ret;
%struct.riri0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -775,11 +643,7 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
; CHECK-NEXT: { // callseq 33, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, %rs2, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 33
; CHECK-NEXT: ret;
%struct.rrii0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -799,11 +663,7 @@ define void @st_param_v4_i8_iiir(i8 %d) {
; CHECK-NEXT: { // callseq 34, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, %rs1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 34
; CHECK-NEXT: ret;
%struct.iiir0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -823,11 +683,7 @@ define void @st_param_v4_i8_iiri(i8 %c) {
; CHECK-NEXT: { // callseq 35, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 35
; CHECK-NEXT: ret;
%struct.iiri0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -847,11 +703,7 @@ define void @st_param_v4_i8_irii(i8 %b) {
; CHECK-NEXT: { // callseq 36, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 36
; CHECK-NEXT: ret;
%struct.irii0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -871,11 +723,7 @@ define void @st_param_v4_i8_riii(i8 %a) {
; CHECK-NEXT: { // callseq 37, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i8,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 37
; CHECK-NEXT: ret;
%struct.riii0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -895,11 +743,7 @@ define void @st_param_v4_i16_iiii() {
; CHECK-NEXT: { // callseq 38, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 38
; CHECK-NEXT: ret;
call void @call_v4_i16(%struct.short4 { i16 1, i16 2, i16 3, i16 4 })
@@ -917,11 +761,7 @@ define void @st_param_v4_i16_irrr(i16 %b, i16 %c, i16 %d) {
; CHECK-NEXT: { // callseq 39, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, %rs2, %rs3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 39
; CHECK-NEXT: ret;
%struct.irrr0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -943,11 +783,7 @@ define void @st_param_v4_i16_rirr(i16 %a, i16 %c, i16 %d) {
; CHECK-NEXT: { // callseq 40, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, %rs2, %rs3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 40
; CHECK-NEXT: ret;
%struct.rirr0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -969,11 +805,7 @@ define void @st_param_v4_i16_rrir(i16 %a, i16 %b, i16 %d) {
; CHECK-NEXT: { // callseq 41, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, %rs3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 41
; CHECK-NEXT: ret;
%struct.rrir0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -995,11 +827,7 @@ define void @st_param_v4_i16_rrri(i16 %a, i16 %b, i16 %c) {
; CHECK-NEXT: { // callseq 42, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, %rs3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 42
; CHECK-NEXT: ret;
%struct.rrri0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1020,11 +848,7 @@ define void @st_param_v4_i16_iirr(i16 %c, i16 %d) {
; CHECK-NEXT: { // callseq 43, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, %rs2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 43
; CHECK-NEXT: ret;
%struct.iirr0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1045,11 +869,7 @@ define void @st_param_v4_i16_irir(i16 %b, i16 %d) {
; CHECK-NEXT: { // callseq 44, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, %rs2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 44
; CHECK-NEXT: ret;
%struct.irir0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1070,11 +890,7 @@ define void @st_param_v4_i16_irri(i16 %b, i16 %c) {
; CHECK-NEXT: { // callseq 45, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, %rs2, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 45
; CHECK-NEXT: ret;
%struct.irri0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1095,11 +911,7 @@ define void @st_param_v4_i16_riir(i16 %a, i16 %d) {
; CHECK-NEXT: { // callseq 46, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, %rs2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 46
; CHECK-NEXT: ret;
%struct.riir0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1120,11 +932,7 @@ define void @st_param_v4_i16_riri(i16 %a, i16 %c) {
; CHECK-NEXT: { // callseq 47, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, %rs2, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 47
; CHECK-NEXT: ret;
%struct.riri0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1145,11 +953,7 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) {
; CHECK-NEXT: { // callseq 48, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 48
; CHECK-NEXT: ret;
%struct.rrii0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1169,11 +973,7 @@ define void @st_param_v4_i16_iiir(i16 %d) {
; CHECK-NEXT: { // callseq 49, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, %rs1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 49
; CHECK-NEXT: ret;
%struct.iiir0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1193,11 +993,7 @@ define void @st_param_v4_i16_iiri(i16 %c) {
; CHECK-NEXT: { // callseq 50, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 50
; CHECK-NEXT: ret;
%struct.iiri0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1217,11 +1013,7 @@ define void @st_param_v4_i16_irii(i16 %b) {
; CHECK-NEXT: { // callseq 51, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 51
; CHECK-NEXT: ret;
%struct.irii0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1241,11 +1033,7 @@ define void @st_param_v4_i16_riii(i16 %a) {
; CHECK-NEXT: { // callseq 52, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i16,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 52
; CHECK-NEXT: ret;
%struct.riii0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1265,11 +1053,7 @@ define void @st_param_v4_i32_iiii() {
; CHECK-NEXT: { // callseq 53, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 53
; CHECK-NEXT: ret;
call void @call_v4_i32(%struct.int4 { i32 1, i32 2, i32 3, i32 4 })
@@ -1287,11 +1071,7 @@ define void @st_param_v4_i32_irrr(i32 %b, i32 %c, i32 %d) {
; CHECK-NEXT: { // callseq 54, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, %r2, %r3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 54
; CHECK-NEXT: ret;
%struct.irrr0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1313,11 +1093,7 @@ define void @st_param_v4_i32_rirr(i32 %a, i32 %c, i32 %d) {
; CHECK-NEXT: { // callseq 55, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, %r2, %r3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 55
; CHECK-NEXT: ret;
%struct.rirr0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1339,11 +1115,7 @@ define void @st_param_v4_i32_rrir(i32 %a, i32 %b, i32 %d) {
; CHECK-NEXT: { // callseq 56, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 3, %r3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 56
; CHECK-NEXT: ret;
%struct.rrir0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1365,11 +1137,7 @@ define void @st_param_v4_i32_rrri(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: { // callseq 57, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, %r3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 57
; CHECK-NEXT: ret;
%struct.rrri0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1390,11 +1158,7 @@ define void @st_param_v4_i32_iirr(i32 %c, i32 %d) {
; CHECK-NEXT: { // callseq 58, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, %r1, %r2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 58
; CHECK-NEXT: ret;
%struct.iirr0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1415,11 +1179,7 @@ define void @st_param_v4_i32_irir(i32 %b, i32 %d) {
; CHECK-NEXT: { // callseq 59, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, 3, %r2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 59
; CHECK-NEXT: ret;
%struct.irir0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1440,11 +1200,7 @@ define void @st_param_v4_i32_irri(i32 %b, i32 %c) {
; CHECK-NEXT: { // callseq 60, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, %r2, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 60
; CHECK-NEXT: ret;
%struct.irri0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1465,11 +1221,7 @@ define void @st_param_v4_i32_riir(i32 %a, i32 %d) {
; CHECK-NEXT: { // callseq 61, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, 3, %r2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 61
; CHECK-NEXT: ret;
%struct.riir0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1490,11 +1242,7 @@ define void @st_param_v4_i32_riri(i32 %a, i32 %c) {
; CHECK-NEXT: { // callseq 62, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, %r2, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 62
; CHECK-NEXT: ret;
%struct.riri0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1515,11 +1263,7 @@ define void @st_param_v4_i32_rrii(i32 %a, i32 %b) {
; CHECK-NEXT: { // callseq 63, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 63
; CHECK-NEXT: ret;
%struct.rrii0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1539,11 +1283,7 @@ define void @st_param_v4_i32_iiir(i32 %d) {
; CHECK-NEXT: { // callseq 64, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, 3, %r1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 64
; CHECK-NEXT: ret;
%struct.iiir0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1563,11 +1303,7 @@ define void @st_param_v4_i32_iiri(i32 %c) {
; CHECK-NEXT: { // callseq 65, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {1, 2, %r1, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 65
; CHECK-NEXT: ret;
%struct.iiri0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1587,11 +1323,7 @@ define void @st_param_v4_i32_irii(i32 %b) {
; CHECK-NEXT: { // callseq 66, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {1, %r1, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 66
; CHECK-NEXT: ret;
%struct.irii0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1611,11 +1343,7 @@ define void @st_param_v4_i32_riii(i32 %a) {
; CHECK-NEXT: { // callseq 67, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 2, 3, 4};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_i32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_i32, (param0);
; CHECK-NEXT: } // callseq 67
; CHECK-NEXT: ret;
%struct.riii0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1635,11 +1363,7 @@ define void @st_param_v4_f32_iiii() {
; CHECK-NEXT: { // callseq 68, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, 0f40000000, 0f40400000, 0f40800000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 68
; CHECK-NEXT: ret;
call void @call_v4_f32(%struct.float4 { float 1.0, float 2.0, float 3.0, float 4.0 })
@@ -1657,11 +1381,7 @@ define void @st_param_v4_f32_irrr(float %b, float %c, float %d) {
; CHECK-NEXT: { // callseq 69, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, %r1, %r2, %r3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 69
; CHECK-NEXT: ret;
%struct.irrr0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1683,11 +1403,7 @@ define void @st_param_v4_f32_rirr(float %a, float %c, float %d) {
; CHECK-NEXT: { // callseq 70, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 0f40000000, %r2, %r3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 70
; CHECK-NEXT: ret;
%struct.rirr0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1709,11 +1425,7 @@ define void @st_param_v4_f32_rrir(float %a, float %b, float %d) {
; CHECK-NEXT: { // callseq 71, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 0f40400000, %r3};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 71
; CHECK-NEXT: ret;
%struct.rrir0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1735,11 +1447,7 @@ define void @st_param_v4_f32_rrri(float %a, float %b, float %c) {
; CHECK-NEXT: { // callseq 72, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, %r3, 0f40800000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 72
; CHECK-NEXT: ret;
%struct.rrri0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1760,11 +1468,7 @@ define void @st_param_v4_f32_iirr(float %c, float %d) {
; CHECK-NEXT: { // callseq 73, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, 0f40000000, %r1, %r2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 73
; CHECK-NEXT: ret;
%struct.iirr0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1785,11 +1489,7 @@ define void @st_param_v4_f32_irir(float %b, float %d) {
; CHECK-NEXT: { // callseq 74, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, %r1, 0f40400000, %r2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 74
; CHECK-NEXT: ret;
%struct.irir0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1810,11 +1510,7 @@ define void @st_param_v4_f32_irri(float %b, float %c) {
; CHECK-NEXT: { // callseq 75, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, %r1, %r2, 0f40800000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 75
; CHECK-NEXT: ret;
%struct.irri0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1835,11 +1531,7 @@ define void @st_param_v4_f32_riir(float %a, float %d) {
; CHECK-NEXT: { // callseq 76, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 0f40000000, 0f40400000, %r2};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 76
; CHECK-NEXT: ret;
%struct.riir0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1860,11 +1552,7 @@ define void @st_param_v4_f32_riri(float %a, float %c) {
; CHECK-NEXT: { // callseq 77, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 0f40000000, %r2, 0f40800000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 77
; CHECK-NEXT: ret;
%struct.riri0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1885,11 +1573,7 @@ define void @st_param_v4_f32_rrii(float %a, float %b) {
; CHECK-NEXT: { // callseq 78, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, %r2, 0f40400000, 0f40800000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 78
; CHECK-NEXT: ret;
%struct.rrii0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1909,11 +1593,7 @@ define void @st_param_v4_f32_iiir(float %d) {
; CHECK-NEXT: { // callseq 79, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, 0f40000000, 0f40400000, %r1};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 79
; CHECK-NEXT: ret;
%struct.iiir0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1933,11 +1613,7 @@ define void @st_param_v4_f32_iiri(float %c) {
; CHECK-NEXT: { // callseq 80, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, 0f40000000, %r1, 0f40800000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 80
; CHECK-NEXT: ret;
%struct.iiri0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1957,11 +1633,7 @@ define void @st_param_v4_f32_irii(float %b) {
; CHECK-NEXT: { // callseq 81, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {0f3F800000, %r1, 0f40400000, 0f40800000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 81
; CHECK-NEXT: ret;
%struct.irii0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1981,11 +1653,7 @@ define void @st_param_v4_f32_riii(float %a) {
; CHECK-NEXT: { // callseq 82, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: st.param.v4.b32 [param0], {%r1, 0f40000000, 0f40400000, 0f40800000};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_v4_f32,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_v4_f32, (param0);
; CHECK-NEXT: } // callseq 82
; CHECK-NEXT: ret;
%struct.riii0 = insertvalue %struct.float4 poison, float %a, 0
@@ -2011,11 +1679,7 @@ define void @st_param_bfloat() {
; CHECK-NEXT: { // callseq 83, 0
; CHECK-NEXT: .param .align 2 .b8 param0[2];
; CHECK-NEXT: st.param.b16 [param0], %rs1;
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: call_bfloat,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni call_bfloat, (param0);
; CHECK-NEXT: } // callseq 83
; CHECK-NEXT: ret;
%five = bitcast i16 16640 to bfloat
diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll
index 52415b05e03d0..5b31b5e24bc68 100644
--- a/llvm/test/CodeGen/NVPTX/store-undef.ll
+++ b/llvm/test/CodeGen/NVPTX/store-undef.ll
@@ -16,11 +16,7 @@ define void @test_store_param_undef() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 16 .b8 param0[32];
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: test_call,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni test_call, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
call void @test_call(%struct.T undef)
@@ -41,11 +37,7 @@ define void @test_store_param_def(i64 %param0, i32 %param1) {
; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1};
; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5};
-; CHECK-NEXT: call.uni
-; CHECK-NEXT: test_call,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni test_call, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: ret;
%V2 = insertelement <2 x i32> undef, i32 %param1, i32 1
diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index a97a8b5822f99..d6961a9541776 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -71,11 +71,7 @@ define ptx_kernel void @baz(ptr %red, i32 %idx) {
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: st.param.b64 [param0], %rd3;
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: texfunc,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), texfunc, (param0);
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: add.rn.f32 %r8, %r2, %r6;
diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
index efbac868dba38..178ee7ff6db18 100644
--- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
@@ -33,11 +33,7 @@
; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]];
; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]];
; CHECK: .param .align 8 .b8 retval0[16];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_s_i8i16p,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0);
; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+3];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+4];
@@ -80,11 +76,7 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]];
; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]];
; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_s_i8i32p,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0);
; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6];
@@ -147,11 +139,7 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]];
; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]];
; CHECK: .param .align 8 .b8 retval0[32];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_s_i8i64p,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0);
; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10];
@@ -192,11 +180,7 @@ define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]];
; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]];
; CHECK: .param .align 8 .b8 retval0[16];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_s_i8f16p,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0);
; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2I_0:%rs[0-9]+]], [retval0+3];
; CHECK-DAG: ld.param.b8 [[R2I_1:%rs[0-9]+]], [retval0+4];
@@ -239,11 +223,7 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]];
; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]];
; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_s_i8f16x2p,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0);
; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6];
@@ -286,11 +266,7 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]];
; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]];
; CHECK: .param .align 8 .b8 retval0[24];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_s_i8f32p,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0);
; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6];
@@ -353,11 +329,7 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]];
; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]];
; CHECK: .param .align 8 .b8 retval0[32];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: test_s_i8f64p,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0);
; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9];
; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10];
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index 80cf938d48b53..618c7ed0c4997 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -28,10 +28,7 @@ define ptx_kernel void @kernel_func() {
; NO-TRAP-UNREACHABLE-EMPTY:
; NO-TRAP-UNREACHABLE-NEXT: // %bb.0:
; NO-TRAP-UNREACHABLE-NEXT: { // callseq 0, 0
-; NO-TRAP-UNREACHABLE-NEXT: call.uni
-; NO-TRAP-UNREACHABLE-NEXT: throw,
-; NO-TRAP-UNREACHABLE-NEXT: (
-; NO-TRAP-UNREACHABLE-NEXT: );
+; NO-TRAP-UNREACHABLE-NEXT: call.uni throw, ();
; NO-TRAP-UNREACHABLE-NEXT: } // callseq 0
; NO-TRAP-UNREACHABLE-NEXT: // begin inline asm
; NO-TRAP-UNREACHABLE-NEXT: exit;
@@ -43,10 +40,7 @@ define ptx_kernel void @kernel_func() {
; NO-TRAP-AFTER-NORETURN-EMPTY:
; NO-TRAP-AFTER-NORETURN-NEXT: // %bb.0:
; NO-TRAP-AFTER-NORETURN-NEXT: { // callseq 0, 0
-; NO-TRAP-AFTER-NORETURN-NEXT: call.uni
-; NO-TRAP-AFTER-NORETURN-NEXT: throw,
-; NO-TRAP-AFTER-NORETURN-NEXT: (
-; NO-TRAP-AFTER-NORETURN-NEXT: );
+; NO-TRAP-AFTER-NORETURN-NEXT: call.uni throw, ();
; NO-TRAP-AFTER-NORETURN-NEXT: } // callseq 0
; NO-TRAP-AFTER-NORETURN-NEXT: // begin inline asm
; NO-TRAP-AFTER-NORETURN-NEXT: exit;
@@ -59,10 +53,7 @@ define ptx_kernel void @kernel_func() {
; TRAP-EMPTY:
; TRAP-NEXT: // %bb.0:
; TRAP-NEXT: { // callseq 0, 0
-; TRAP-NEXT: call.uni
-; TRAP-NEXT: throw,
-; TRAP-NEXT: (
-; TRAP-NEXT: );
+; TRAP-NEXT: call.uni throw, ();
; TRAP-NEXT: } // callseq 0
; TRAP-NEXT: trap; exit;
;
@@ -72,10 +63,7 @@ define ptx_kernel void @kernel_func() {
; BUG-FIXED-EMPTY:
; BUG-FIXED-NEXT: // %bb.0:
; BUG-FIXED-NEXT: { // callseq 0, 0
-; BUG-FIXED-NEXT: call.uni
-; BUG-FIXED-NEXT: throw,
-; BUG-FIXED-NEXT: (
-; BUG-FIXED-NEXT: );
+; BUG-FIXED-NEXT: call.uni throw, ();
; BUG-FIXED-NEXT: } // callseq 0
; BUG-FIXED-NEXT: trap;
call void @throw()
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index ddaa9fd831af7..ca1b722527a89 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -126,12 +126,7 @@ define dso_local i32 @foo() {
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: call.uni (retval0),
-; CHECK-PTX-NEXT: variadics1,
-; CHECK-PTX-NEXT: (
-; CHECK-PTX-NEXT: param0,
-; CHECK-PTX-NEXT: param1
-; CHECK-PTX-NEXT: );
+; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0];
; CHECK-PTX-NEXT: } // callseq 0
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2;
@@ -238,12 +233,7 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: call.uni (retval0),
-; CHECK-PTX-NEXT: variadics2,
-; CHECK-PTX-NEXT: (
-; CHECK-PTX-NEXT: param0,
-; CHECK-PTX-NEXT: param1
-; CHECK-PTX-NEXT: );
+; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0];
; CHECK-PTX-NEXT: } // callseq 1
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2;
@@ -315,12 +305,7 @@ define dso_local i32 @baz() {
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: call.uni (retval0),
-; CHECK-PTX-NEXT: variadics3,
-; CHECK-PTX-NEXT: (
-; CHECK-PTX-NEXT: param0,
-; CHECK-PTX-NEXT: param1
-; CHECK-PTX-NEXT: );
+; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0];
; CHECK-PTX-NEXT: } // callseq 2
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2;
@@ -397,12 +382,7 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd8;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: call.uni (retval0),
-; CHECK-PTX-NEXT: variadics4,
-; CHECK-PTX-NEXT: (
-; CHECK-PTX-NEXT: param0,
-; CHECK-PTX-NEXT: param1
-; CHECK-PTX-NEXT: );
+; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 3
; CHECK-PTX-NEXT: ret;
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
index b8779b9d54ea7..f466b1de9fb5a 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
@@ -19,11 +19,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd2, %rd1};
; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd4, %rd3};
; CHECK-NEXT: .param .align 16 .b8 retval0[32];
-; CHECK-NEXT: call.uni (retval0),
-; CHECK-NEXT: callee_St8x4,
-; CHECK-NEXT: (
-; CHECK-NEXT: param0
-; CHECK-NEXT: );
+; CHECK-NEXT: call.uni (retval0), callee_St8x4, (param0);
; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0];
; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [retval0+16];
; CHECK-NEXT: } // callseq 0
>From e365cfe71946b4c1bcea4867942487a60fe0aa3c Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Wed, 25 Jun 2025 02:45:41 +0000
Subject: [PATCH 3/3] address comments
---
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 12 --------
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 -
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 31 ++++++++++++---------
3 files changed, 18 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 849274f3678ca..61fe8a53cb63a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -163,10 +163,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
if (tryStoreParam(N))
return;
break;
- case ISD::INTRINSIC_WO_CHAIN:
- if (tryIntrinsicNoChain(N))
- return;
- break;
case ISD::INTRINSIC_W_CHAIN:
if (tryIntrinsicChain(N))
return;
@@ -902,14 +898,6 @@ NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,
return {InstructionOrdering, Scope};
}
-bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
- unsigned IID = N->getConstantOperandVal(0);
- switch (IID) {
- default:
- return false;
- }
-}
-
void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
SDValue Src = N->getOperand(0);
AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index ff58e4486a222..92b5c773258ed 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -69,7 +69,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
#include "NVPTXGenDAGISel.inc"
void Select(SDNode *N) override;
- bool tryIntrinsicNoChain(SDNode *N);
bool tryIntrinsicChain(SDNode *N);
bool tryIntrinsicVoid(SDNode *N);
void SelectTexSurfHandle(SDNode *N);
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index bc506dd8a7114..1ea6d98a1df8e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1741,11 +1741,14 @@ def FMOV64i : MOVi<B64, "b64", f64, f64imm, fpimm>;
def to_tglobaladdr : SDNodeXForm<globaladdr, [{
- return CurDAG->getTargetGlobalAddress(N->getGlobal(), SDLoc(N), N->getValueType(0), N->getOffset(), N->getTargetFlags());
+ return CurDAG->getTargetGlobalAddress(N->getGlobal(), SDLoc(N),
+ N->getValueType(0), N->getOffset(),
+ N->getTargetFlags());
}]>;
def to_texternsym : SDNodeXForm<externalsym, [{
- return CurDAG->getTargetExternalSymbol(N->getSymbol(), N->getValueType(0), N->getTargetFlags());
+ return CurDAG->getTargetExternalSymbol(N->getSymbol(), N->getValueType(0),
+ N->getTargetFlags());
}]>;
def to_tframeindex : SDNodeXForm<frameindex, [{
@@ -2139,30 +2142,32 @@ foreach is_convergent = [0, 1] in {
let isCall = 1, isConvergent = is_convergent in {
def CALL # convergent_suffix :
- NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params, i32imm:$proto),
+ NVPTXInst<(outs),
+ (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params,
+ i32imm:$proto),
"call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>;
def CALL_UNI # convergent_suffix :
- NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
+ NVPTXInst<(outs),
+ (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
"call.uni${rets:RetList} $addr, (${params:ParamList});", []>;
}
- defvar inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
- defvar inst_uni = !cast<NVPTXInst>("CALL_UNI" # convergent_suffix);
-
+ defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, globaladdr:$addr, imm:$proto),
- (inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>;
+ (call_inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>;
def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i32:$addr, imm:$proto),
- (inst $addr, imm:$rets, imm:$params, imm:$proto)>;
+ (call_inst $addr, imm:$rets, imm:$params, imm:$proto)>;
def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i64:$addr, imm:$proto),
- (inst $addr, imm:$rets, imm:$params, imm:$proto)>;
+ (call_inst $addr, imm:$rets, imm:$params, imm:$proto)>;
+ defvar call_uni_inst = !cast<NVPTXInst>("CALL_UNI" # convergent_suffix);
def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, globaladdr:$addr, 0),
- (inst_uni (to_tglobaladdr $addr), imm:$rets, imm:$params)>;
+ (call_uni_inst (to_tglobaladdr $addr), imm:$rets, imm:$params)>;
def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i32:$addr, 0),
- (inst_uni $addr, imm:$rets, imm:$params)>;
+ (call_uni_inst $addr, imm:$rets, imm:$params)>;
def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i64:$addr, 0),
- (inst_uni $addr, imm:$rets, imm:$params)>;
+ (call_uni_inst $addr, imm:$rets, imm:$params)>;
}
def LoadParamMemI64 : LoadParamMemInst<B64, ".b64">;
More information about the cfe-commits
mailing list