[clang] [llvm] [NVPTX] Consolidate and cleanup various NVPTXISD nodes (NFC) (PR #145581)

Alex MacLean via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 24 19:45:54 PDT 2025


https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/145581

>From c8cc587ee44ba499cda0fc6712ca33ba22dc7c77 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Mon, 23 Jun 2025 18:23:24 +0000
Subject: [PATCH 1/3] [NVPTX] Remove various dead/extraneous NVPTXISD nodes
 (NFC)

---
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp |  59 ++--------
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp |  32 +-----
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h   |  13 ---
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td     | 113 ++++++--------------
 llvm/lib/Target/NVPTX/NVPTXIntrinsics.td    |   2 +
 5 files changed, 44 insertions(+), 175 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index ff10eea371049..af9050c55d33a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -160,8 +160,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
   case NVPTXISD::StoreParam:
   case NVPTXISD::StoreParamV2:
   case NVPTXISD::StoreParamV4:
-  case NVPTXISD::StoreParamS32:
-  case NVPTXISD::StoreParamU32:
     if (tryStoreParam(N))
       return;
     break;
@@ -909,19 +907,9 @@ bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
   switch (IID) {
   default:
     return false;
-  case Intrinsic::nvvm_texsurf_handle_internal:
-    SelectTexSurfHandle(N);
-    return true;
   }
 }
 
-void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
-  // Op 0 is the intrinsic ID
-  SDValue Wrapper = N->getOperand(1);
-  SDValue GlobalVal = Wrapper.getOperand(0);
-  ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
-                                        MVT::i64, GlobalVal));
-}
 
 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
   SDValue Src = N->getOperand(0);
@@ -1717,8 +1705,6 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Unexpected opcode");
-  case NVPTXISD::StoreParamU32:
-  case NVPTXISD::StoreParamS32:
   case NVPTXISD::StoreParam:
     NumElts = 1;
     break;
@@ -1796,27 +1782,6 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
     }
     }
     break;
-  // Special case: if we have a sign-extend/zero-extend node, insert the
-  // conversion instruction first, and use that as the value operand to
-  // the selected StoreParam node.
-  case NVPTXISD::StoreParamU32: {
-    Opcode = NVPTX::StoreParamI32_r;
-    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
-                                                MVT::i32);
-    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
-                                         MVT::i32, Ops[0], CvtNone);
-    Ops[0] = SDValue(Cvt, 0);
-    break;
-  }
-  case NVPTXISD::StoreParamS32: {
-    Opcode = NVPTX::StoreParamI32_r;
-    SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
-                                                MVT::i32);
-    SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
-                                         MVT::i32, Ops[0], CvtNone);
-    Ops[0] = SDValue(Cvt, 0);
-    break;
-  }
   }
 
   SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
@@ -2105,22 +2070,14 @@ static inline bool isAddLike(const SDValue V) {
 // selectBaseADDR - Match a dag node which will serve as the base address for an
 // ADDR operand pair.
 static SDValue selectBaseADDR(SDValue N, SelectionDAG *DAG) {
-  // Return true if TGA or ES.
-  if (N.getOpcode() == ISD::TargetGlobalAddress ||
-      N.getOpcode() == ISD::TargetExternalSymbol)
-    return N;
-
-  if (N.getOpcode() == NVPTXISD::Wrapper)
-    return N.getOperand(0);
-
-  // addrspacecast(Wrapper(arg_symbol) to addrspace(PARAM)) -> arg_symbol
-  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N))
-    if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
-        CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
-        CastN->getOperand(0).getOpcode() == NVPTXISD::Wrapper)
-      return selectBaseADDR(CastN->getOperand(0).getOperand(0), DAG);
-
-  if (auto *FIN = dyn_cast<FrameIndexSDNode>(N))
+  if (const auto *GA = dyn_cast<GlobalAddressSDNode>(N))
+    return DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N),
+                                       GA->getValueType(0), GA->getOffset(),
+                                       GA->getTargetFlags());
+  if (const auto *ES = dyn_cast<ExternalSymbolSDNode>(N))
+    return DAG->getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
+                                        ES->getTargetFlags());
+  if (const auto *FIN = dyn_cast<FrameIndexSDNode>(N))
     return DAG->getTargetFrameIndex(FIN->getIndex(), FIN->getValueType(0));
 
   return N;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d2fafe854e9e4..12d1cc17250ae 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -702,9 +702,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
-
   // We want to legalize constant related memmove and memcopy
   // intrinsics.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -1055,14 +1052,10 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case NVPTXISD::FIRST_NUMBER:
     break;
 
-    MAKE_CASE(NVPTXISD::CALL)
     MAKE_CASE(NVPTXISD::RET_GLUE)
-    MAKE_CASE(NVPTXISD::LOAD_PARAM)
-    MAKE_CASE(NVPTXISD::Wrapper)
     MAKE_CASE(NVPTXISD::DeclareParam)
     MAKE_CASE(NVPTXISD::DeclareScalarParam)
     MAKE_CASE(NVPTXISD::DeclareRet)
-    MAKE_CASE(NVPTXISD::DeclareScalarRet)
     MAKE_CASE(NVPTXISD::DeclareRetParam)
     MAKE_CASE(NVPTXISD::PrintCall)
     MAKE_CASE(NVPTXISD::PrintConvergentCall)
@@ -1074,24 +1067,18 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(NVPTXISD::StoreParam)
     MAKE_CASE(NVPTXISD::StoreParamV2)
     MAKE_CASE(NVPTXISD::StoreParamV4)
-    MAKE_CASE(NVPTXISD::StoreParamS32)
-    MAKE_CASE(NVPTXISD::StoreParamU32)
     MAKE_CASE(NVPTXISD::CallArgBegin)
     MAKE_CASE(NVPTXISD::CallArg)
     MAKE_CASE(NVPTXISD::LastCallArg)
     MAKE_CASE(NVPTXISD::CallArgEnd)
     MAKE_CASE(NVPTXISD::CallVoid)
-    MAKE_CASE(NVPTXISD::CallVal)
-    MAKE_CASE(NVPTXISD::CallSymbol)
     MAKE_CASE(NVPTXISD::Prototype)
     MAKE_CASE(NVPTXISD::MoveParam)
     MAKE_CASE(NVPTXISD::StoreRetval)
     MAKE_CASE(NVPTXISD::StoreRetvalV2)
     MAKE_CASE(NVPTXISD::StoreRetvalV4)
-    MAKE_CASE(NVPTXISD::PseudoUseParam)
     MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
     MAKE_CASE(NVPTXISD::BUILD_VECTOR)
-    MAKE_CASE(NVPTXISD::RETURN)
     MAKE_CASE(NVPTXISD::CallSeqBegin)
     MAKE_CASE(NVPTXISD::CallSeqEnd)
     MAKE_CASE(NVPTXISD::CallPrototype)
@@ -1115,7 +1102,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(NVPTXISD::STACKSAVE)
     MAKE_CASE(NVPTXISD::SETP_F16X2)
     MAKE_CASE(NVPTXISD::SETP_BF16X2)
-    MAKE_CASE(NVPTXISD::Dummy)
     MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
     MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
     MAKE_CASE(NVPTXISD::BrxEnd)
@@ -1189,15 +1175,6 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
   }
 }
 
-SDValue
-NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
-  SDLoc dl(Op);
-  const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
-  auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
-  Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
-  return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
-}
-
 std::string NVPTXTargetLowering::getPrototype(
     const DataLayout &DL, Type *retTy, const ArgListTy &Args,
     const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign RetAlign,
@@ -2919,8 +2896,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return SDValue();
   case ISD::ADDRSPACECAST:
     return LowerADDRSPACECAST(Op, DAG);
-  case ISD::GlobalAddress:
-    return LowerGlobalAddress(Op, DAG);
   case ISD::INTRINSIC_W_CHAIN:
     return Op;
   case ISD::INTRINSIC_WO_CHAIN:
@@ -3129,8 +3104,7 @@ SDValue NVPTXTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());
 
   // Store the address of unsized array <function>_vararg[] in the ap object.
-  SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
-  SDValue VAReg = DAG.getNode(NVPTXISD::Wrapper, DL, PtrVT, Arg);
+  SDValue VAReg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);
 
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), DL, VAReg, Op.getOperand(1),
@@ -3370,7 +3344,7 @@ SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
                                             EVT v) const {
   StringRef SavedStr = nvTM->getStrPool().save(
       getParamName(&DAG.getMachineFunction().getFunction(), idx));
-  return DAG.getTargetExternalSymbol(SavedStr.data(), v);
+  return DAG.getExternalSymbol(SavedStr.data(), v);
 }
 
 SDValue NVPTXTargetLowering::LowerFormalArguments(
@@ -3438,7 +3412,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
 
       SDValue P;
       if (isKernelFunction(*F)) {
-        P = DAG.getNode(NVPTXISD::Wrapper, dl, ByvalIn.VT, ArgSymbol);
+        P = ArgSymbol;
         P.getNode()->setIROrder(Arg.getArgNo() + 1);
       } else {
         P = DAG.getNode(NVPTXISD::MoveParam, dl, ByvalIn.VT, ArgSymbol);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 0a54a8fd71f32..984e95c1b2e79 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -24,15 +24,11 @@ namespace NVPTXISD {
 enum NodeType : unsigned {
   // Start the numbering from where ISD NodeType finishes.
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
-  Wrapper,
-  CALL,
   RET_GLUE,
-  LOAD_PARAM,
   DeclareParam,
   DeclareScalarParam,
   DeclareRetParam,
   DeclareRet,
-  DeclareScalarRet,
   PrintCall,
   PrintConvergentCall,
   PrintCallUni,
@@ -42,12 +38,8 @@ enum NodeType : unsigned {
   LastCallArg,
   CallArgEnd,
   CallVoid,
-  CallVal,
-  CallSymbol,
   Prototype,
   MoveParam,
-  PseudoUseParam,
-  RETURN,
   CallSeqBegin,
   CallSeqEnd,
   CallPrototype,
@@ -83,7 +75,6 @@ enum NodeType : unsigned {
   CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X,
   CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y,
   CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z,
-  Dummy,
 
   FIRST_MEMORY_OPCODE,
   LoadV2 = FIRST_MEMORY_OPCODE,
@@ -100,8 +91,6 @@ enum NodeType : unsigned {
   StoreParam,
   StoreParamV2,
   StoreParamV4,
-  StoreParamS32, // to sext and store a <32bit value, not used currently
-  StoreParamU32, // to zext and store a <32bit value, not used currently
   StoreRetval,
   StoreRetvalV2,
   StoreRetvalV4,
@@ -120,8 +109,6 @@ class NVPTXTargetLowering : public TargetLowering {
                                const NVPTXSubtarget &STI);
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
-  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-
   const char *getTargetNodeName(unsigned Opcode) const override;
 
   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 5979054764647..ea8a3f955645f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1700,17 +1700,6 @@ def Offseti32imm : Operand<i32> {
   let PrintMethod = "printOffseti32imm";
 }
 
-def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
-def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
-
-// Load a memory address into a u32 or u64 register.
-def MOV_ADDR : BasicNVPTXInst<(outs B32:$dst), (ins ADDR_base:$a),
-                         "mov.b32",
-                         [(set i32:$dst, (Wrapper tglobaladdr:$a))]>;
-def MOV_ADDR64 : BasicNVPTXInst<(outs B64:$dst), (ins ADDR_base:$a),
-                           "mov.b64",
-                           [(set i64:$dst, (Wrapper tglobaladdr:$a))]>;
-
 // Get pointer to local stack.
 let hasSideEffects = false in {
   def MOV_DEPOT_ADDR :    NVPTXInst<(outs B32:$d), (ins i32imm:$num),
@@ -1750,8 +1739,24 @@ def BFMOV16i : MOVi<B16, "b16", bf16, bf16imm, fpimm>;
 def FMOV32i : MOVi<B32, "b32", f32, f32imm, fpimm>;
 def FMOV64i : MOVi<B64, "b64", f64, f64imm, fpimm>;
 
-def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>;
-def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>;
+
+def to_tglobaladdr : SDNodeXForm<globaladdr, [{
+  return CurDAG->getTargetGlobalAddress(N->getGlobal(), SDLoc(N), N->getValueType(0), N->getOffset(), N->getTargetFlags());
+}]>;
+
+def to_texternsym : SDNodeXForm<externalsym, [{
+  return CurDAG->getTargetExternalSymbol(N->getSymbol(), N->getValueType(0), N->getTargetFlags());
+}]>;
+
+def to_tframeindex : SDNodeXForm<frameindex, [{
+  return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
+}]>;
+
+def : Pat<(i32 globaladdr:$dst), (IMOV32i (to_tglobaladdr $dst))>;
+def : Pat<(i64 globaladdr:$dst), (IMOV64i (to_tglobaladdr $dst))>;
+
+def : Pat<(i32 externalsym:$dst), (IMOV32i (to_texternsym $dst))>;
+def : Pat<(i64 externalsym:$dst), (IMOV64i (to_texternsym $dst))>;
 
 //---- Copy Frame Index ----
 def LEA_ADDRi :   NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
@@ -1759,10 +1764,6 @@ def LEA_ADDRi :   NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
 def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
                             "add.u64 \t$dst, ${addr:add};", []>;
 
-def to_tframeindex : SDNodeXForm<frameindex, [{
-  return CurDAG->getTargetFrameIndex(N->getIndex(), N->getValueType(0));
-}]>;
-
 def : Pat<(i32 frameindex:$fi), (LEA_ADDRi (to_tframeindex $fi), 0)>;
 def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
 
@@ -1990,11 +1991,10 @@ def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]
 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
-def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
+def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
 def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
 def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
-def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
 def SDTProxyRegProfile : SDTypeProfile<1, 1, []>;
 
 def DeclareParam :
@@ -2039,12 +2039,6 @@ def StoreParamV2 :
 def StoreParamV4 :
   SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamU32 :
-  SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def StoreParamS32 :
-  SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 def CallArgBegin :
   SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
@@ -2063,9 +2057,6 @@ def CallVoid :
 def Prototype :
   SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallVal :
-  SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 def MoveParam :
   SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
 def StoreRetval :
@@ -2077,12 +2068,6 @@ def StoreRetvalV2 :
 def StoreRetvalV4 :
   SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
          [SDNPHasChain, SDNPSideEffect]>;
-def PseudoUseParam :
-  SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def RETURNNode :
-  SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
-         [SDNPHasChain, SDNPSideEffect]>;
 def ProxyReg :
   SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
@@ -2247,19 +2232,18 @@ def StoreRetvalV4I8   : StoreRetvalV4Inst<B16, ".b8">;
 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
 def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
 def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
-def RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
 
 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
                              [(CallArg 1, imm:$a)]>;
 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
                                  [(LastCallArg 1, imm:$a)]>;
 
-def CallVoidInst :      NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ",
-                                  [(CallVoid (Wrapper tglobaladdr:$addr))]>;
-def CallVoidInstReg :   NVPTXInst<(outs), (ins B32:$addr), "$addr, ",
-                                  [(CallVoid i32:$addr)]>;
-def CallVoidInstReg64 : NVPTXInst<(outs), (ins B64:$addr), "$addr, ",
-                                  [(CallVoid i64:$addr)]>;
+def CALL_ADDR :      NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ", []>;
+
+def : Pat<(CallVoid globaladdr:$addr), (CALL_ADDR (to_tglobaladdr $addr))>;
+def : Pat<(CallVoid i32:$addr), (CALL_ADDR $addr)>;
+def : Pat<(CallVoid i64:$addr), (CALL_ADDR $addr)>;
+
 def PrototypeInst :     NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
                                   [(Prototype (i32 imm:$val))]>;
 
@@ -2271,10 +2255,6 @@ def DeclareRetScalarInst :
   NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
             ".param .b$size retval$num;",
             [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
-def DeclareRetRegInst :
-  NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
-            ".reg .b$size retval$num;",
-            [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
 
 def DeclareParamInst :
   NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size),
@@ -2284,29 +2264,13 @@ def DeclareScalarParamInst :
   NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
             ".param .b$size param$a;",
             [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
-def DeclareScalarRegInst :
-  NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
-            ".reg .b$size param$a;",
-            [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
-
-class MoveParamSymbolInst<RegTyInfo t> :
-  BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src),
-            "mov.b" # t.Size,
-            [(set t.Ty:$dst, (MoveParam texternalsym:$src))]>;
-
-def MOV64_PARAM : MoveParamSymbolInst<I64RT>;
-def MOV32_PARAM : MoveParamSymbolInst<I32RT>;
 
-class PseudoUseParamInst<NVPTXRegClass regclass, ValueType vt> :
-  NVPTXInst<(outs), (ins regclass:$src),
-            "// Pseudo use of $src",
-            [(PseudoUseParam vt:$src)]>;
-
-def PseudoUseParamI64 : PseudoUseParamInst<B64, i64>;
-def PseudoUseParamI32 : PseudoUseParamInst<B32, i32>;
-def PseudoUseParamI16 : PseudoUseParamInst<B16, i16>;
-def PseudoUseParamF64 : PseudoUseParamInst<B64, f64>;
-def PseudoUseParamF32 : PseudoUseParamInst<B32, f32>;
+foreach t = [I32RT, I64RT] in {
+  defvar inst_name = "MOV" # t.Size # "_PARAM";
+  def inst_name : BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), "mov.b" # t.Size>;
+  def : Pat<(MoveParam (t.Ty externalsym:$src)),
+            (!cast<NVPTXInst>(inst_name) (t.Ty (to_texternsym $src)))>;
+}
 
 multiclass ProxyRegInst<string SzStr, NVPTXRegClass rc> {
   def NAME : BasicNVPTXInst<(outs rc:$dst), (ins rc:$src),
@@ -2861,21 +2825,6 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                             SDNPSideEffect]>;
 
-def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
-                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def calltarget : Operand<i32>;
-let isCall=1 in {
-   def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>;
-}
-
-def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>;
-def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>;
-
-// Pseudo instructions.
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : NVPTXInst<outs, ins, asmstr, pattern>;
-
 def Callseq_Start :
   NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
             "\\{ // callseq $amt1, $amt2",
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 10d7f04d8d937..cc1fd027d8515 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2224,6 +2224,8 @@ def nvvm_move_sym64 : NVPTXInst<(outs B64:$r), (ins ADDR_base:$s),
 
 def texsurf_handles
   : BasicNVPTXInst<(outs B64:$result), (ins ADDR_base:$src), "mov.u64">;
+def : Pat<(int_nvvm_texsurf_handle_internal globaladdr:$src),
+          (texsurf_handles (to_tglobaladdr $src))>;
 
 //-----------------------------------
 // Compiler Error Warn

>From aad4cadf4610d9d2fdc9b341e65a54c0e5677415 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 24 Jun 2025 16:59:08 +0000
Subject: [PATCH 2/3] more cleanup

---
 clang/test/CodeGenCUDA/bf16.cu                |   6 +-
 .../NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp   |  22 +
 .../NVPTX/MCTargetDesc/NVPTXInstPrinter.h     |   2 +
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp   |   1 -
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   |  89 +---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h     |  19 +-
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp      |  20 +-
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.h        |   3 -
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td       | 139 ++---
 llvm/test/CodeGen/NVPTX/alias.ll              |   3 +-
 .../test/CodeGen/NVPTX/bf16x2-instructions.ll |   7 +-
 llvm/test/CodeGen/NVPTX/byval-const-global.ll |   6 +-
 .../CodeGen/NVPTX/call-with-alloca-buffer.ll  |   3 +-
 llvm/test/CodeGen/NVPTX/combine-mad.ll        |   7 +-
 .../test/CodeGen/NVPTX/convergent-mir-call.ll |  10 +-
 .../CodeGen/NVPTX/convert-call-to-indirect.ll |  43 +-
 llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll |  12 +-
 llvm/test/CodeGen/NVPTX/f16-instructions.ll   |  24 +-
 llvm/test/CodeGen/NVPTX/f16x2-instructions.ll |  21 +-
 llvm/test/CodeGen/NVPTX/fma.ll                |  14 +-
 llvm/test/CodeGen/NVPTX/forward-ld-param.ll   |  12 +-
 llvm/test/CodeGen/NVPTX/fp128-storage-type.ll |   6 +-
 llvm/test/CodeGen/NVPTX/i16x2-instructions.ll |  21 +-
 llvm/test/CodeGen/NVPTX/i8x4-instructions.ll  |  21 +-
 llvm/test/CodeGen/NVPTX/indirect_byval.ll     |  16 +-
 llvm/test/CodeGen/NVPTX/ldparam-v4.ll         |   5 +-
 llvm/test/CodeGen/NVPTX/local-stack-frame.ll  |  36 +-
 .../CodeGen/NVPTX/lower-args-gridconstant.ll  |  36 +-
 llvm/test/CodeGen/NVPTX/lower-args.ll         |  14 +-
 llvm/test/CodeGen/NVPTX/lower-byval-args.ll   |  24 +-
 llvm/test/CodeGen/NVPTX/misched_func_call.ll  |   6 +-
 .../NVPTX/naked-fn-with-frame-pointer.ll      |  20 +-
 llvm/test/CodeGen/NVPTX/param-add.ll          |   6 +-
 llvm/test/CodeGen/NVPTX/param-load-store.ll   | 168 ++----
 llvm/test/CodeGen/NVPTX/param-overalign.ll    |  89 ++--
 .../CodeGen/NVPTX/param-vectorize-device.ll   |  72 +--
 llvm/test/CodeGen/NVPTX/shift-opt.ll          |  12 +-
 llvm/test/CodeGen/NVPTX/st-param-imm.ll       | 504 +++---------------
 llvm/test/CodeGen/NVPTX/store-undef.ll        |  12 +-
 llvm/test/CodeGen/NVPTX/tex-read-cuda.ll      |   6 +-
 .../NVPTX/unaligned-param-load-store.ll       |  42 +-
 llvm/test/CodeGen/NVPTX/unreachable.ll        |  20 +-
 llvm/test/CodeGen/NVPTX/variadics-backend.ll  |  28 +-
 .../Inputs/nvptx-basic.ll.expected            |   6 +-
 44 files changed, 387 insertions(+), 1246 deletions(-)

diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu
index df56ec60c63ae..12474381e718b 100644
--- a/clang/test/CodeGenCUDA/bf16.cu
+++ b/clang/test/CodeGenCUDA/bf16.cu
@@ -37,11 +37,7 @@ __device__ __bf16 test_call( __bf16 in) {
 // CHECK:        ld.param.b16    %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
 // CHECK:        st.param.b16    [param0], %[[R]];
 // CHECK:        .param .align 2 .b8 retval0[2];
-// CHECK:        call.uni (retval0),
-// CHECK-NEXT:   _Z13external_funcDF16b,
-// CHECK-NEXT:   (
-// CHECK-NEXT:   param0
-// CHECK-NEXT    );
+// CHECK:        call.uni (retval0), _Z13external_funcDF16b, (param0);
 // CHECK:        ld.param.b16    %[[RET:rs[0-9]+]], [retval0];
   return external_func(in);
 // CHECK:        st.param.b16    [func_retval0], %[[RET]]
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index cc79257fb9c86..28f6968ee6caf 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -457,3 +457,25 @@ void NVPTXInstPrinter::printCTAGroup(const MCInst *MI, int OpNum,
   }
   llvm_unreachable("Invalid cta_group in printCTAGroup");
 }
+
+void NVPTXInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
+                                        raw_ostream &O, StringRef Modifier) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isImm() && "Invalid operand");
+  const auto Imm = MO.getImm();
+
+  if (Modifier == "RetList") {
+    assert((Imm == 1 || Imm == 0) && "Invalid return list");
+    if (Imm)
+      O << " (retval0),";
+    return;
+  }
+
+  if (Modifier == "ParamList") {
+    assert(Imm >= 0 && "Invalid parameter list");
+    interleaveComma(llvm::seq(Imm), O,
+                    [&](const auto &I) { O << "param" << I; });
+    return;
+  }
+  llvm_unreachable("Invalid modifier");
+}
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index f73af7a3f2c6e..6189284e8a58c 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -52,6 +52,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
   void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O);
   void printTmaReductionMode(const MCInst *MI, int OpNum, raw_ostream &O);
   void printCTAGroup(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O,
+                        StringRef Modifier = {});
 };
 
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index af9050c55d33a..849274f3678ca 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -910,7 +910,6 @@ bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
   }
 }
 
-
 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
   SDValue Src = N->getOperand(0);
   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 12d1cc17250ae..b924a1f5ac93c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1057,30 +1057,19 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(NVPTXISD::DeclareScalarParam)
     MAKE_CASE(NVPTXISD::DeclareRet)
     MAKE_CASE(NVPTXISD::DeclareRetParam)
-    MAKE_CASE(NVPTXISD::PrintCall)
-    MAKE_CASE(NVPTXISD::PrintConvergentCall)
-    MAKE_CASE(NVPTXISD::PrintCallUni)
-    MAKE_CASE(NVPTXISD::PrintConvergentCallUni)
+    MAKE_CASE(NVPTXISD::CALL)
     MAKE_CASE(NVPTXISD::LoadParam)
     MAKE_CASE(NVPTXISD::LoadParamV2)
     MAKE_CASE(NVPTXISD::LoadParamV4)
     MAKE_CASE(NVPTXISD::StoreParam)
     MAKE_CASE(NVPTXISD::StoreParamV2)
     MAKE_CASE(NVPTXISD::StoreParamV4)
-    MAKE_CASE(NVPTXISD::CallArgBegin)
-    MAKE_CASE(NVPTXISD::CallArg)
-    MAKE_CASE(NVPTXISD::LastCallArg)
-    MAKE_CASE(NVPTXISD::CallArgEnd)
-    MAKE_CASE(NVPTXISD::CallVoid)
-    MAKE_CASE(NVPTXISD::Prototype)
     MAKE_CASE(NVPTXISD::MoveParam)
     MAKE_CASE(NVPTXISD::StoreRetval)
     MAKE_CASE(NVPTXISD::StoreRetvalV2)
     MAKE_CASE(NVPTXISD::StoreRetvalV4)
     MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
     MAKE_CASE(NVPTXISD::BUILD_VECTOR)
-    MAKE_CASE(NVPTXISD::CallSeqBegin)
-    MAKE_CASE(NVPTXISD::CallSeqEnd)
     MAKE_CASE(NVPTXISD::CallPrototype)
     MAKE_CASE(NVPTXISD::ProxyReg)
     MAKE_CASE(NVPTXISD::LoadV2)
@@ -1578,9 +1567,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
               ? promoteScalarArgumentSize(TypeSize * 8)
               : TypeSize * 8;
 
-      Chain = DAG.getNode(
-          NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
-          {Chain, GetI32(ArgI), GetI32(PromotedSize), GetI32(0), InGlue});
+      Chain =
+          DAG.getNode(NVPTXISD::DeclareScalarParam, dl, {MVT::Other, MVT::Glue},
+                      {Chain, GetI32(ArgI), GetI32(PromotedSize), InGlue});
     }
     InGlue = Chain.getValue(1);
 
@@ -1717,16 +1706,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     const unsigned ResultSize = DL.getTypeAllocSizeInBits(RetTy);
     if (!shouldPassAsArray(RetTy)) {
       const unsigned PromotedResultSize = promoteScalarArgumentSize(ResultSize);
-      SDValue DeclareRetOps[] = {Chain, GetI32(1), GetI32(PromotedResultSize),
-                                 GetI32(0), InGlue};
       Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, {MVT::Other, MVT::Glue},
-                          DeclareRetOps);
+                          {Chain, GetI32(PromotedResultSize), InGlue});
       InGlue = Chain.getValue(1);
     } else {
-      SDValue DeclareRetOps[] = {Chain, GetI32(RetAlign->value()),
-                                 GetI32(ResultSize / 8), GetI32(0), InGlue};
-      Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl,
-                          {MVT::Other, MVT::Glue}, DeclareRetOps);
+      Chain = DAG.getNode(
+          NVPTXISD::DeclareRetParam, dl, {MVT::Other, MVT::Glue},
+          {Chain, GetI32(RetAlign->value()), GetI32(ResultSize / 8), InGlue});
       InGlue = Chain.getValue(1);
     }
   }
@@ -1777,25 +1763,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                      HasVAArgs ? std::optional(FirstVAArg) : std::nullopt, *CB,
                      UniqueCallSite);
     const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
-    SDValue ProtoOps[] = {
-        Chain,
-        DAG.getTargetExternalSymbol(ProtoStr, MVT::i32),
-        InGlue,
-    };
-    Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue},
-                        ProtoOps);
+    Chain = DAG.getNode(
+        NVPTXISD::CallPrototype, dl, {MVT::Other, MVT::Glue},
+        {Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InGlue});
     InGlue = Chain.getValue(1);
   }
-  // Op to just print "call"
-  SDValue PrintCallOps[] = {Chain, GetI32(Ins.empty() ? 0 : 1), InGlue};
-  // We model convergent calls as separate opcodes.
-  unsigned Opcode =
-      IsIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
-  if (CLI.IsConvergent)
-    Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
-                                              : NVPTXISD::PrintConvergentCall;
-  Chain = DAG.getNode(Opcode, dl, {MVT::Other, MVT::Glue}, PrintCallOps);
-  InGlue = Chain.getValue(1);
 
   if (ConvertToIndirectCall) {
     // Copy the function ptr to a ptx register and use the register to call the
@@ -1809,38 +1781,17 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     Callee = DAG.getCopyFromReg(RegCopy, dl, DestReg, DestVT);
   }
 
-  // Ops to print out the function name
-  SDValue CallVoidOps[] = { Chain, Callee, InGlue };
-  Chain =
-      DAG.getNode(NVPTXISD::CallVoid, dl, {MVT::Other, MVT::Glue}, CallVoidOps);
-  InGlue = Chain.getValue(1);
-
-  // Ops to print out the param list
-  SDValue CallArgBeginOps[] = { Chain, InGlue };
-  Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, {MVT::Other, MVT::Glue},
-                      CallArgBeginOps);
+  const unsigned Proto = IsIndirectCall ? UniqueCallSite : 0;
+  const unsigned NumArgs =
+      std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
+  /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
+  ///      NumParams, Callee, Proto, InGlue)
+  Chain = DAG.getNode(NVPTXISD::CALL, dl, {MVT::Other, MVT::Glue},
+                      {Chain, GetI32(CLI.IsConvergent), GetI32(IsIndirectCall),
+                       GetI32(Ins.empty() ? 0 : 1), GetI32(NumArgs), Callee,
+                       GetI32(Proto), InGlue});
   InGlue = Chain.getValue(1);
 
-  const unsigned E = std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
-  for (const unsigned I : llvm::seq(E)) {
-    const unsigned Opcode =
-        I == (E - 1) ? NVPTXISD::LastCallArg : NVPTXISD::CallArg;
-    SDValue CallArgOps[] = {Chain, GetI32(1), GetI32(I), InGlue};
-    Chain = DAG.getNode(Opcode, dl, {MVT::Other, MVT::Glue}, CallArgOps);
-    InGlue = Chain.getValue(1);
-  }
-  SDValue CallArgEndOps[] = {Chain, GetI32(IsIndirectCall ? 0 : 1), InGlue};
-  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, {MVT::Other, MVT::Glue},
-                      CallArgEndOps);
-  InGlue = Chain.getValue(1);
-
-  if (IsIndirectCall) {
-    SDValue PrototypeOps[] = {Chain, GetI32(UniqueCallSite), InGlue};
-    Chain = DAG.getNode(NVPTXISD::Prototype, dl, {MVT::Other, MVT::Glue},
-                        PrototypeOps);
-    InGlue = Chain.getValue(1);
-  }
-
   SmallVector<SDValue, 16> ProxyRegOps;
   // An item of the vector is filled if the element does not need a ProxyReg
   // operation on it and should be added to InVals as is. ProxyRegOps and
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 984e95c1b2e79..5efdd1582214a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -29,19 +29,14 @@ enum NodeType : unsigned {
   DeclareScalarParam,
   DeclareRetParam,
   DeclareRet,
-  PrintCall,
-  PrintConvergentCall,
-  PrintCallUni,
-  PrintConvergentCallUni,
-  CallArgBegin,
-  CallArg,
-  LastCallArg,
-  CallArgEnd,
-  CallVoid,
-  Prototype,
+
+  /// This node represents a PTX call instruction. It's operands are as follows:
+  ///
+  /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
+  ///      NumParams, Callee, Proto, InGlue)
+  CALL,
+
   MoveParam,
-  CallSeqBegin,
-  CallSeqEnd,
   CallPrototype,
   ProxyReg,
   FSHL_CLAMP,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index bf84d1dca4ed5..e218ef17bb09b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -190,22 +190,4 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB,
   BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB);
   BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
   return 2;
-}
-
-bool NVPTXInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
-                                          const MachineBasicBlock *MBB,
-                                          const MachineFunction &MF) const {
-  // Prevent the scheduler from reordering & splitting up MachineInstrs
-  // which must stick together (in initially set order) to
-  // comprise a valid PTX function call sequence.
-  switch (MI.getOpcode()) {
-  case NVPTX::CallUniPrintCallRetInst1:
-  case NVPTX::CallArgBeginInst:
-  case NVPTX::CallArgParam:
-  case NVPTX::LastCallArgParam:
-  case NVPTX::CallArgEndInst1:
-    return true;
-  }
-
-  return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
-}
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 95464dbbd176d..4e9dc9d3b4686 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -66,9 +66,6 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         const DebugLoc &DL,
                         int *BytesAdded = nullptr) const override;
-  bool isSchedulingBoundary(const MachineInstr &MI,
-                            const MachineBasicBlock *MBB,
-                            const MachineFunction &MF) const override;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index ea8a3f955645f..bc506dd8a7114 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1976,21 +1976,15 @@ defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
 defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
 
 def SDTDeclareParamProfile :
-  SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
+  SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
 def SDTDeclareScalarParamProfile :
-  SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
+  SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
 def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
 def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
-def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
 def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
 def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
-def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
-def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
-def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
-def SDTCallValProfile : SDTypeProfile<1, 0, []>;
 def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
 def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
@@ -2004,10 +1998,12 @@ def DeclareScalarParam :
   SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 def DeclareRetParam :
-  SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile,
+  SDNode<"NVPTXISD::DeclareRetParam",
+         SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 def DeclareRet :
-  SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
+  SDNode<"NVPTXISD::DeclareRet",
+         SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 def LoadParam :
   SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
@@ -2018,18 +2014,6 @@ def LoadParamV2 :
 def LoadParamV4 :
   SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
          [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
-def PrintCall :
-  SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def PrintConvergentCall :
-  SDNode<"NVPTXISD::PrintConvergentCall", SDTPrintCallProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def PrintCallUni :
-  SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def PrintConvergentCallUni :
-  SDNode<"NVPTXISD::PrintConvergentCallUni", SDTPrintCallProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 def StoreParam :
   SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
@@ -2039,24 +2023,6 @@ def StoreParamV2 :
 def StoreParamV4 :
   SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallArgBegin :
-  SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallArg :
-  SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def LastCallArg :
-  SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallArgEnd :
-  SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def CallVoid :
-  SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
-def Prototype :
-  SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 def MoveParam :
   SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
 def StoreRetval :
@@ -2072,6 +2038,15 @@ def ProxyReg :
   SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 
+  /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
+  ///      NumParams, Callee, Proto, InGlue)
+def SDTCallProfile : SDTypeProfile<0, 6,
+                       [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+                        SDTCisVT<3, i32>, SDTCisVT<5, i32>]>;
+def call :
+  SDNode<"NVPTXISD::CALL", SDTCallProfile,
+         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+
 let mayLoad = true in {
   class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
         NVPTXInst<(outs regclass:$dst), (ins Offseti32imm:$b),
@@ -2092,11 +2067,6 @@ let mayLoad = true in {
                   []>;
 }
 
-class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
-      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
-                !strconcat("mov", opstr, " \t$dst, retval$b;"),
-                [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
-
 let mayStore = true in {
 
   multiclass StoreParamInst<NVPTXRegClass regclass, Operand IMMType, string opstr, bit support_imm = true> {
@@ -2159,23 +2129,40 @@ let mayStore = true in {
                   []>;
 }
 
-let isCall=1 in {
-  multiclass CALL<string OpcStr, SDNode OpNode> {
-     def PrintCallNoRetInst : NVPTXInst<(outs), (ins),
-       OpcStr # " ", [(OpNode 0)]>;
-     def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
-       OpcStr # " (retval0), ", [(OpNode 1)]>;
+/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
+///      NumParams, Callee, Proto, InGlue)
+
+def CallOperand : Operand<i32> { let PrintMethod = "printCallOperand"; }
+
+foreach is_convergent = [0, 1] in {
+  defvar convergent_suffix = !if(is_convergent, "_conv", "");
+
+  let isCall = 1, isConvergent = is_convergent in {
+    def CALL # convergent_suffix :
+      NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params, i32imm:$proto),
+                "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>;
+
+    def CALL_UNI # convergent_suffix :
+      NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
+                "call.uni${rets:RetList} $addr, (${params:ParamList});", []>;
   }
-}
 
-defm Call : CALL<"call", PrintCall>;
-defm CallUni : CALL<"call.uni", PrintCallUni>;
+  defvar inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
+  defvar inst_uni = !cast<NVPTXInst>("CALL_UNI" # convergent_suffix);
 
-// Convergent call instructions.  These are identical to regular calls, except
-// they have the isConvergent bit set.
-let isConvergent=1 in {
-  defm ConvergentCall : CALL<"call", PrintConvergentCall>;
-  defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>;
+  def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, globaladdr:$addr, imm:$proto),
+            (inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>;
+  def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i32:$addr, imm:$proto),
+            (inst $addr, imm:$rets, imm:$params, imm:$proto)>;
+  def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i64:$addr, imm:$proto),
+            (inst $addr, imm:$rets, imm:$params, imm:$proto)>;
+
+  def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, globaladdr:$addr, 0),
+            (inst_uni (to_tglobaladdr $addr), imm:$rets, imm:$params)>;
+  def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i32:$addr, 0),
+            (inst_uni $addr, imm:$rets, imm:$params)>;
+  def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i64:$addr, 0),
+            (inst_uni $addr, imm:$rets, imm:$params)>;
 }
 
 def LoadParamMemI64    : LoadParamMemInst<B64, ".b64">;
@@ -2229,41 +2216,23 @@ def StoreRetvalV4I32  : StoreRetvalV4Inst<B32, ".b32">;
 def StoreRetvalV4I16  : StoreRetvalV4Inst<B16, ".b16">;
 def StoreRetvalV4I8   : StoreRetvalV4Inst<B16, ".b8">;
 
-def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
-def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
-def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
-
-def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
-                             [(CallArg 1, imm:$a)]>;
-def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
-                                 [(LastCallArg 1, imm:$a)]>;
-
-def CALL_ADDR :      NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ", []>;
-
-def : Pat<(CallVoid globaladdr:$addr), (CALL_ADDR (to_tglobaladdr $addr))>;
-def : Pat<(CallVoid i32:$addr), (CALL_ADDR $addr)>;
-def : Pat<(CallVoid i64:$addr), (CALL_ADDR $addr)>;
-
-def PrototypeInst :     NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
-                                  [(Prototype (i32 imm:$val))]>;
-
 def DeclareRetMemInst :
-  NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num),
-            ".param .align $align .b8 retval$num[$size];",
-            [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
+  NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size),
+            ".param .align $align .b8 retval0[$size];",
+            [(DeclareRetParam imm:$align, imm:$size)]>;
 def DeclareRetScalarInst :
-  NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
-            ".param .b$size retval$num;",
-            [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
+  NVPTXInst<(outs), (ins i32imm:$size),
+            ".param .b$size retval0;",
+            [(DeclareRet imm:$size)]>;
 
 def DeclareParamInst :
   NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size),
             ".param .align $align .b8 param$a[$size];",
-            [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
+            [(DeclareParam imm:$align, imm:$a, imm:$size)]>;
 def DeclareScalarParamInst :
   NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
             ".param .b$size param$a;",
-            [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
+            [(DeclareScalarParam imm:$a, imm:$size)]>;
 
 foreach t = [I32RT, I64RT] in {
   defvar inst_name = "MOV" # t.Size # "_PARAM";
diff --git a/llvm/test/CodeGen/NVPTX/alias.ll b/llvm/test/CodeGen/NVPTX/alias.ll
index 8ae29b51290ef..01761c21ab103 100644
--- a/llvm/test/CodeGen/NVPTX/alias.ll
+++ b/llvm/test/CodeGen/NVPTX/alias.ll
@@ -56,8 +56,7 @@ attributes #0 = { noreturn }
 ; CHECK-NEXT: .noreturn
 
 ;      CHECK: .visible .func  (.param .b32 func_retval0) z()
-;      CHECK:      call.uni (retval0), 
-; CHECK-NEXT:      b,
+;      CHECK:      call.uni (retval0), b,
 
 
 ; CHECK: .alias b, a;
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
index 6f115756a8ae7..01e4065a7baa7 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -216,12 +216,7 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
 ; CHECK-NEXT:    .param .align 4 .b8 param1[4];
 ; CHECK-NEXT:    st.param.b32 [param1], %r2;
 ; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    test_callee,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
diff --git a/llvm/test/CodeGen/NVPTX/byval-const-global.ll b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
index 2af1e6d7e185b..ad9e4b089e8d8 100644
--- a/llvm/test/CodeGen/NVPTX/byval-const-global.ll
+++ b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
@@ -19,11 +19,7 @@ define void @foo() {
 ; CHECK-NEXT:    .param .align 8 .b8 param0[16];
 ; CHECK-NEXT:    st.param.b64 [param0], %rd1;
 ; CHECK-NEXT:    st.param.b64 [param0+8], %rd2;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    bar,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni bar, (param0);
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    ret;
   call void @bar(ptr byval(%struct) @G)
diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index a2175dd009f5f..0cd7058174d67 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -48,8 +48,7 @@ entry:
 ; CHECK-NEXT:   st.param.b64  [param0], %rd[[A_REG]]
 ; CHECK-NEXT:   .param .b64 param1;
 ; CHECK-NEXT:   st.param.b64  [param1], %rd[[SP_REG]]
-; CHECK-NEXT:   call.uni
-; CHECK-NEXT:   callee,
+; CHECK-NEXT:   call.uni callee,
 
   call void @callee(ptr %a, ptr %buf) #2
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll
index dc6d504c2c66c..2232810d02128 100644
--- a/llvm/test/CodeGen/NVPTX/combine-mad.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll
@@ -203,12 +203,7 @@ define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    .param .b32 param1;
 ; CHECK-NEXT:    st.param.b32 [param1], %r5;
 ; CHECK-NEXT:    .param .b32 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    use,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), use, (param0, param1);
 ; CHECK-NEXT:    ld.param.b32 %r6, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
diff --git a/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll b/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
index 5e85bf4554546..39a2d7f9e1504 100644
--- a/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
+++ b/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
@@ -9,18 +9,16 @@ declare void @conv() convergent
 declare void @not_conv()
 
 define void @test(ptr %f) {
-  ; CHECK: ConvergentCallUniPrintCall
-  ; CHECK-NEXT: @conv
+  ; CHECK: CALL_UNI_conv @conv
   call void @conv()
 
-  ; CHECK: CallUniPrintCall
-  ; CHECK-NEXT: @not_conv
+  ; CHECK: CALL_UNI @not_conv
   call void @not_conv()
 
-  ; CHECK: ConvergentCallPrintCall
+  ; CHECK: CALL_conv %{{[0-9]+}}
   call void %f() convergent
 
-  ; CHECK: CallPrintCall
+  ; CHECK: CALL %{{[0-9]+}}
   call void %f()
 
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
index 71a46fa6d4820..d1b478d341915 100644
--- a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
@@ -9,12 +9,7 @@ define %struct.64 @test_return_type_mismatch(ptr %p) {
 ; CHECK-LABEL: test_return_type_mismatch(
 ; CHECK:         .param .align 1 .b8 retval0[8];
 ; CHECK-NEXT:    prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT:    call (retval0),
-; CHECK-NEXT:    %rd
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    )
-; CHECK-NEXT:    , prototype_0;
+; CHECK-NEXT:    call (retval0), %rd{{[0-9]+}}, (param0), prototype_0;
   %ret = call %struct.64 @callee(ptr %p)
   ret %struct.64 %ret
 }
@@ -23,12 +18,7 @@ define i64 @test_param_type_mismatch(ptr %p) {
 ; CHECK-LABEL: test_param_type_mismatch(
 ; CHECK:         .param .b64 retval0;
 ; CHECK-NEXT:    prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _);
-; CHECK-NEXT:    call (retval0),
-; CHECK-NEXT:    %rd
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    )
-; CHECK-NEXT:    , prototype_1;
+; CHECK-NEXT:    call (retval0), %rd{{[0-9]+}}, (param0), prototype_1;
   %ret = call i64 @callee(i64 7)
   ret i64 %ret
 }
@@ -37,13 +27,7 @@ define i64 @test_param_count_mismatch(ptr %p) {
 ; CHECK-LABEL: test_param_count_mismatch(
 ; CHECK:         .param .b64 retval0;
 ; CHECK-NEXT:    prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _);
-; CHECK-NEXT:    call (retval0),
-; CHECK-NEXT:    %rd
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    )
-; CHECK-NEXT:    , prototype_2;
+; CHECK-NEXT:    call (retval0), %rd{{[0-9]+}}, (param0, param1), prototype_2;
   %ret = call i64 @callee(ptr %p, i64 7)
   ret i64 %ret
 }
@@ -52,12 +36,7 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
 ; CHECK-LABEL: test_return_type_mismatch_variadic(
 ; CHECK:         .param .align 1 .b8 retval0[8];
 ; CHECK-NEXT:    prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT:    call (retval0),
-; CHECK-NEXT:    %rd
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    )
-; CHECK-NEXT:    , prototype_3;
+; CHECK-NEXT:    call (retval0), %rd{{[0-9]+}}, (param0), prototype_3;
   %ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p)
   ret %struct.64 %ret
 }
@@ -65,12 +44,7 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
 define i64 @test_param_type_mismatch_variadic(ptr %p) {
 ; CHECK-LABEL: test_param_type_mismatch_variadic(
 ; CHECK:         .param .b64 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    callee_variadic
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    )
+; CHECK-NEXT:    call.uni (retval0), callee_variadic, (param0, param1);
   %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
   ret i64 %ret
 }
@@ -78,12 +52,7 @@ define i64 @test_param_type_mismatch_variadic(ptr %p) {
 define i64 @test_param_count_mismatch_variadic(ptr %p) {
 ; CHECK-LABEL: test_param_count_mismatch_variadic(
 ; CHECK:         .param .b64 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    callee_variadic
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    )
+; CHECK-NEXT:    call.uni (retval0), callee_variadic, (param0, param1);
   %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
   ret i64 %ret
 }
diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
index b73aea76a4528..4d2ba7d00f872 100644
--- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
@@ -24,11 +24,7 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
 ; CHECK-32-NEXT:    .param .b32 param0;
 ; CHECK-32-NEXT:    st.param.b32 [param0], %r5;
 ; CHECK-32-NEXT:    .param .b32 retval0;
-; CHECK-32-NEXT:    call.uni (retval0),
-; CHECK-32-NEXT:    bar,
-; CHECK-32-NEXT:    (
-; CHECK-32-NEXT:    param0
-; CHECK-32-NEXT:    );
+; CHECK-32-NEXT:    call.uni (retval0), bar, (param0);
 ; CHECK-32-NEXT:    ld.param.b32 %r6, [retval0];
 ; CHECK-32-NEXT:    } // callseq 0
 ; CHECK-32-NEXT:    st.param.b32 [func_retval0], %r6;
@@ -49,11 +45,7 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
 ; CHECK-64-NEXT:    .param .b64 param0;
 ; CHECK-64-NEXT:    st.param.b64 [param0], %rd5;
 ; CHECK-64-NEXT:    .param .b32 retval0;
-; CHECK-64-NEXT:    call.uni (retval0),
-; CHECK-64-NEXT:    bar,
-; CHECK-64-NEXT:    (
-; CHECK-64-NEXT:    param0
-; CHECK-64-NEXT:    );
+; CHECK-64-NEXT:    call.uni (retval0), bar, (param0);
 ; CHECK-64-NEXT:    ld.param.b32 %r1, [retval0];
 ; CHECK-64-NEXT:    } // callseq 0
 ; CHECK-64-NEXT:    st.param.b32 [func_retval0], %r1;
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index c905fc04ce780..252edf4b02c76 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -263,12 +263,7 @@ declare half @test_callee(half %a, half %b) #0
 ; CHECK-DAG:  st.param.b16    [param0], [[A]];
 ; CHECK-DAG:  st.param.b16    [param1], [[B]];
 ; CHECK-DAG:  .param .align 2 .b8 retval0[2];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT:        test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT:        param0,
-; CHECK-NEXT:        param1
-; CHECK-NEXT: );
+; CHECK:      call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT: ld.param.b16    [[R:%rs[0-9]+]], [retval0];
 ; CHECK-NEXT: }
 ; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
@@ -287,12 +282,7 @@ define half @test_call(half %a, half %b) #0 {
 ; CHECK-DAG:  st.param.b16    [param0], [[B]];
 ; CHECK-DAG:  st.param.b16    [param1], [[A]];
 ; CHECK-DAG:  .param .align 2 .b8 retval0[2];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT:        test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT:        param0,
-; CHECK-NEXT:        param1
-; CHECK-NEXT: );
+; CHECK:      call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT: ld.param.b16    [[R:%rs[0-9]+]], [retval0];
 ; CHECK-NEXT: }
 ; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
@@ -311,12 +301,7 @@ define half @test_call_flipped(half %a, half %b) #0 {
 ; CHECK-DAG:  st.param.b16    [param0], [[B]];
 ; CHECK-DAG:  st.param.b16    [param1], [[A]];
 ; CHECK-DAG:  .param .align 2 .b8 retval0[2];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT:        test_callee,
-; CHECK-NEXT: (
-; CHECK-NEXT:        param0,
-; CHECK-NEXT:        param1
-; CHECK-NEXT: );
+; CHECK:      call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT: ld.param.b16    [[R:%rs[0-9]+]], [retval0];
 ; CHECK-NEXT: }
 ; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
@@ -650,8 +635,7 @@ else:
 ; CHECK:      ld.b16  [[AB:%rs[0-9]+]], [%[[P1]]];
 ; CHECK:      {
 ; CHECK:      st.param.b64    [param0], %[[P1]];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_dummy
+; CHECK:      call.uni (retval0), test_dummy
 ; CHECK:      }
 ; CHECK:      setp.ne.b32     [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 0;
 ; CHECK:      @[[PRED]] bra   [[LOOP]];
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index fc7f53c5fdca3..8da2c1d1ebac2 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -467,12 +467,7 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
 ; CHECK-NEXT:    .param .align 4 .b8 param1[4];
 ; CHECK-NEXT:    st.param.b32 [param1], %r2;
 ; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    test_callee,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
@@ -495,12 +490,7 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
 ; CHECK-NEXT:    .param .align 4 .b8 param1[4];
 ; CHECK-NEXT:    st.param.b32 [param1], %r1;
 ; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    test_callee,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
 ; CHECK-NEXT:    } // callseq 1
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
@@ -523,12 +513,7 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
 ; CHECK-NEXT:    .param .align 4 .b8 param1[4];
 ; CHECK-NEXT:    st.param.b32 [param1], %r1;
 ; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    test_callee,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
 ; CHECK-NEXT:    } // callseq 2
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll
index 327851725991e..b74e531adba3f 100644
--- a/llvm/test/CodeGen/NVPTX/fma.ll
+++ b/llvm/test/CodeGen/NVPTX/fma.ll
@@ -40,12 +40,7 @@ define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
 ; CHECK-NEXT:    .param .b32 param1;
 ; CHECK-NEXT:    st.param.b32 [param1], %r6;
 ; CHECK-NEXT:    .param .b32 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    dummy_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), dummy_f32, (param0, param1);
 ; CHECK-NEXT:    ld.param.b32 %r7, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r7;
@@ -92,12 +87,7 @@ define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
 ; CHECK-NEXT:    .param .b64 param1;
 ; CHECK-NEXT:    st.param.b64 [param1], %rd6;
 ; CHECK-NEXT:    .param .b64 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    dummy_f64,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), dummy_f64, (param0, param1);
 ; CHECK-NEXT:    ld.param.b64 %rd7, [retval0];
 ; CHECK-NEXT:    } // callseq 1
 ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd7;
diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
index d253df5ed1b9c..ed8f6b4511079 100644
--- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
+++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
@@ -50,11 +50,7 @@ define void @test_ld_param_escaping(ptr byval(i32) %a) {
 ; CHECK-NEXT:    { // callseq 0, 0
 ; CHECK-NEXT:    .param .b64 param0;
 ; CHECK-NEXT:    st.param.b64 [param0], %rd2;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    escape,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni escape, (param0);
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    ret;
   call void @escape(ptr %a)
@@ -72,11 +68,7 @@ define void @test_ld_param_byval(ptr byval(i32) %a) {
 ; CHECK-NEXT:    { // callseq 1, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.b32 [param0], %r1;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    byval_user,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni byval_user, (param0);
 ; CHECK-NEXT:    } // callseq 1
 ; CHECK-NEXT:    ret;
   call void @byval_user(ptr %a)
diff --git a/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll b/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll
index d40f514acd408..de69d02ded5e4 100644
--- a/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll
+++ b/llvm/test/CodeGen/NVPTX/fp128-storage-type.ll
@@ -42,11 +42,7 @@ define void @call(fp128 %x) {
 ; CHECK-NEXT:    { // callseq 0, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v2.b64 [param0], {%rd1, %rd2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call, (param0);
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    ret;
   call void @call(fp128 %x)
diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
index bf1fb06c44688..d5ddadf2b21c5 100644
--- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
@@ -647,12 +647,7 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 {
 ; COMMON-NEXT:    .param .align 4 .b8 param1[4];
 ; COMMON-NEXT:    st.param.b32 [param1], %r2;
 ; COMMON-NEXT:    .param .align 4 .b8 retval0[4];
-; COMMON-NEXT:    call.uni (retval0),
-; COMMON-NEXT:    test_callee,
-; COMMON-NEXT:    (
-; COMMON-NEXT:    param0,
-; COMMON-NEXT:    param1
-; COMMON-NEXT:    );
+; COMMON-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; COMMON-NEXT:    ld.param.b32 %r3, [retval0];
 ; COMMON-NEXT:    } // callseq 0
 ; COMMON-NEXT:    st.param.b32 [func_retval0], %r3;
@@ -675,12 +670,7 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
 ; COMMON-NEXT:    .param .align 4 .b8 param1[4];
 ; COMMON-NEXT:    st.param.b32 [param1], %r1;
 ; COMMON-NEXT:    .param .align 4 .b8 retval0[4];
-; COMMON-NEXT:    call.uni (retval0),
-; COMMON-NEXT:    test_callee,
-; COMMON-NEXT:    (
-; COMMON-NEXT:    param0,
-; COMMON-NEXT:    param1
-; COMMON-NEXT:    );
+; COMMON-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; COMMON-NEXT:    ld.param.b32 %r3, [retval0];
 ; COMMON-NEXT:    } // callseq 1
 ; COMMON-NEXT:    st.param.b32 [func_retval0], %r3;
@@ -703,12 +693,7 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
 ; COMMON-NEXT:    .param .align 4 .b8 param1[4];
 ; COMMON-NEXT:    st.param.b32 [param1], %r1;
 ; COMMON-NEXT:    .param .align 4 .b8 retval0[4];
-; COMMON-NEXT:    call.uni (retval0),
-; COMMON-NEXT:    test_callee,
-; COMMON-NEXT:    (
-; COMMON-NEXT:    param0,
-; COMMON-NEXT:    param1
-; COMMON-NEXT:    );
+; COMMON-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; COMMON-NEXT:    ld.param.b32 %r3, [retval0];
 ; COMMON-NEXT:    } // callseq 2
 ; COMMON-NEXT:    st.param.b32 [func_retval0], %r3;
diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index 7cc7468bc7de7..72c279bee4268 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -833,12 +833,7 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 {
 ; CHECK-NEXT:    .param .align 4 .b8 param1[4];
 ; CHECK-NEXT:    st.param.b32 [param1], %r2;
 ; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    test_callee,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
@@ -861,12 +856,7 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
 ; CHECK-NEXT:    .param .align 4 .b8 param1[4];
 ; CHECK-NEXT:    st.param.b32 [param1], %r1;
 ; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    test_callee,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
 ; CHECK-NEXT:    } // callseq 1
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
@@ -889,12 +879,7 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
 ; CHECK-NEXT:    .param .align 4 .b8 param1[4];
 ; CHECK-NEXT:    st.param.b32 [param1], %r1;
 ; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    test_callee,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), test_callee, (param0, param1);
 ; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
 ; CHECK-NEXT:    } // callseq 2
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
index 1341a04c939c6..eae0321433946 100644
--- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -33,13 +33,7 @@ define internal i32 @foo() {
 ; CHECK-NEXT:    st.param.b64 [param1], %rd4;
 ; CHECK-NEXT:    .param .b32 retval0;
 ; CHECK-NEXT:    prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
-; CHECK-NEXT:    call (retval0),
-; CHECK-NEXT:    %rd1,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    )
-; CHECK-NEXT:    , prototype_0;
+; CHECK-NEXT:    call (retval0), %rd1, (param0, param1), prototype_0;
 ; CHECK-NEXT:    ld.param.b32 %r1, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
@@ -76,13 +70,7 @@ define internal i32 @bar() {
 ; CHECK-NEXT:    st.param.b64 [param1], %rd5;
 ; CHECK-NEXT:    .param .b32 retval0;
 ; CHECK-NEXT:    prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
-; CHECK-NEXT:    call (retval0),
-; CHECK-NEXT:    %rd1,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0,
-; CHECK-NEXT:    param1
-; CHECK-NEXT:    )
-; CHECK-NEXT:    , prototype_1;
+; CHECK-NEXT:    call (retval0), %rd1, (param0, param1), prototype_1;
 ; CHECK-NEXT:    ld.param.b32 %r1, [retval0];
 ; CHECK-NEXT:    } // callseq 1
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
diff --git a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll
index 419c780f7d82a..9e9705709f2bd 100644
--- a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll
+++ b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll
@@ -14,10 +14,7 @@ define void @foo(ptr %ptr) {
 ; CHECK-NEXT:    ld.param.b64 %rd1, [foo_param_0];
 ; CHECK-NEXT:    { // callseq 0, 0
 ; CHECK-NEXT:    .param .align 16 .b8 retval0[16];
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    bar,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), bar, ();
 ; CHECK-NEXT:    ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.v4.b32 [%rd1], {%r1, %r2, %r3, %r4};
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index 2bfd891a04a17..a9004d00e7807 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -58,11 +58,7 @@ define ptx_kernel void @foo2(i32 %a) {
 ; PTX32-NEXT:    { // callseq 0, 0
 ; PTX32-NEXT:    .param .b32 param0;
 ; PTX32-NEXT:    st.param.b32 [param0], %r2;
-; PTX32-NEXT:    call.uni
-; PTX32-NEXT:    bar,
-; PTX32-NEXT:    (
-; PTX32-NEXT:    param0
-; PTX32-NEXT:    );
+; PTX32-NEXT:    call.uni bar, (param0);
 ; PTX32-NEXT:    } // callseq 0
 ; PTX32-NEXT:    ret;
 ;
@@ -84,11 +80,7 @@ define ptx_kernel void @foo2(i32 %a) {
 ; PTX64-NEXT:    { // callseq 0, 0
 ; PTX64-NEXT:    .param .b64 param0;
 ; PTX64-NEXT:    st.param.b64 [param0], %rd1;
-; PTX64-NEXT:    call.uni
-; PTX64-NEXT:    bar,
-; PTX64-NEXT:    (
-; PTX64-NEXT:    param0
-; PTX64-NEXT:    );
+; PTX64-NEXT:    call.uni bar, (param0);
 ; PTX64-NEXT:    } // callseq 0
 ; PTX64-NEXT:    ret;
   %local = alloca i32, align 4
@@ -159,20 +151,12 @@ define void @foo4() {
 ; PTX32-NEXT:    { // callseq 1, 0
 ; PTX32-NEXT:    .param .b32 param0;
 ; PTX32-NEXT:    st.param.b32 [param0], %r1;
-; PTX32-NEXT:    call.uni
-; PTX32-NEXT:    bar,
-; PTX32-NEXT:    (
-; PTX32-NEXT:    param0
-; PTX32-NEXT:    );
+; PTX32-NEXT:    call.uni bar, (param0);
 ; PTX32-NEXT:    } // callseq 1
 ; PTX32-NEXT:    { // callseq 2, 0
 ; PTX32-NEXT:    .param .b32 param0;
 ; PTX32-NEXT:    st.param.b32 [param0], %r3;
-; PTX32-NEXT:    call.uni
-; PTX32-NEXT:    bar,
-; PTX32-NEXT:    (
-; PTX32-NEXT:    param0
-; PTX32-NEXT:    );
+; PTX32-NEXT:    call.uni bar, (param0);
 ; PTX32-NEXT:    } // callseq 2
 ; PTX32-NEXT:    ret;
 ;
@@ -197,20 +181,12 @@ define void @foo4() {
 ; PTX64-NEXT:    { // callseq 1, 0
 ; PTX64-NEXT:    .param .b64 param0;
 ; PTX64-NEXT:    st.param.b64 [param0], %rd1;
-; PTX64-NEXT:    call.uni
-; PTX64-NEXT:    bar,
-; PTX64-NEXT:    (
-; PTX64-NEXT:    param0
-; PTX64-NEXT:    );
+; PTX64-NEXT:    call.uni bar, (param0);
 ; PTX64-NEXT:    } // callseq 1
 ; PTX64-NEXT:    { // callseq 2, 0
 ; PTX64-NEXT:    .param .b64 param0;
 ; PTX64-NEXT:    st.param.b64 [param0], %rd3;
-; PTX64-NEXT:    call.uni
-; PTX64-NEXT:    bar,
-; PTX64-NEXT:    (
-; PTX64-NEXT:    param0
-; PTX64-NEXT:    );
+; PTX64-NEXT:    call.uni bar, (param0);
 ; PTX64-NEXT:    } // callseq 2
 ; PTX64-NEXT:    ret;
   %A = alloca i32
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index c3f94455b3038..0a2cd81ac904c 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -133,12 +133,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
 ; PTX-NEXT:    st.param.b64 [param0], %rd3;
 ; PTX-NEXT:    .param .b32 retval0;
 ; PTX-NEXT:    prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _);
-; PTX-NEXT:    call (retval0),
-; PTX-NEXT:    %rd1,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    )
-; PTX-NEXT:    , prototype_0;
+; PTX-NEXT:    call (retval0), %rd1, (param0), prototype_0;
 ; PTX-NEXT:    ld.param.b32 %r1, [retval0];
 ; PTX-NEXT:    } // callseq 0
 ; PTX-NEXT:    ret;
@@ -182,14 +177,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
 ; PTX-NEXT:    st.param.b64 [param2], %rd4;
 ; PTX-NEXT:    .param .b32 retval0;
 ; PTX-NEXT:    prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
-; PTX-NEXT:    call (retval0),
-; PTX-NEXT:    %rd1,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0,
-; PTX-NEXT:    param1,
-; PTX-NEXT:    param2
-; PTX-NEXT:    )
-; PTX-NEXT:    , prototype_1;
+; PTX-NEXT:    call (retval0), %rd1, (param0, param1, param2), prototype_1;
 ; PTX-NEXT:    ld.param.b32 %r2, [retval0];
 ; PTX-NEXT:    } // callseq 1
 ; PTX-NEXT:    ret;
@@ -284,12 +272,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
 ; PTX-NEXT:    st.param.b64 [param0], %rd5;
 ; PTX-NEXT:    .param .b32 retval0;
 ; PTX-NEXT:    prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _);
-; PTX-NEXT:    call (retval0),
-; PTX-NEXT:    %rd1,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    )
-; PTX-NEXT:    , prototype_2;
+; PTX-NEXT:    call (retval0), %rd1, (param0), prototype_2;
 ; PTX-NEXT:    ld.param.b32 %r3, [retval0];
 ; PTX-NEXT:    } // callseq 2
 ; PTX-NEXT:    ret;
@@ -330,12 +313,7 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
 ; PTX-NEXT:    st.param.b64 [param0], %rd5;
 ; PTX-NEXT:    .param .b32 retval0;
 ; PTX-NEXT:    prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _);
-; PTX-NEXT:    call (retval0),
-; PTX-NEXT:    %rd1,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    )
-; PTX-NEXT:    , prototype_3;
+; PTX-NEXT:    call (retval0), %rd1, (param0), prototype_3;
 ; PTX-NEXT:    ld.param.b32 %r4, [retval0];
 ; PTX-NEXT:    } // callseq 3
 ; PTX-NEXT:    st.param.b32 [func_retval0], %r3;
@@ -561,11 +539,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
 ; PTX-NEXT:    { // callseq 4, 0
 ; PTX-NEXT:    .param .align 4 .b8 param0[4];
 ; PTX-NEXT:    st.param.b32 [param0], %r1;
-; PTX-NEXT:    call.uni
-; PTX-NEXT:    device_func,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    );
+; PTX-NEXT:    call.uni device_func, (param0);
 ; PTX-NEXT:    } // callseq 4
 ; PTX-NEXT:    ret;
   call void @device_func(ptr byval(i32) align 4 %input)
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 246408ecf6a3a..6f334b075241b 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes IR,IRC
 ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes IR,IRO
 ; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX,PTXC
@@ -47,11 +47,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
 ; PTX-NEXT:    .param .b64 param0;
 ; PTX-NEXT:    st.param.b64 [param0], %rd5;
 ; PTX-NEXT:    .param .b64 retval0;
-; PTX-NEXT:    call.uni (retval0),
-; PTX-NEXT:    escape,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    );
+; PTX-NEXT:    call.uni (retval0), escape, (param0);
 ; PTX-NEXT:    ld.param.b64 %rd6, [retval0];
 ; PTX-NEXT:    } // callseq 0
 ; PTX-NEXT:    ret;
@@ -89,11 +85,7 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
 ; PTX-NEXT:    .param .b64 param0;
 ; PTX-NEXT:    st.param.b64 [param0], %rd2;
 ; PTX-NEXT:    .param .b64 retval0;
-; PTX-NEXT:    call.uni (retval0),
-; PTX-NEXT:    escape,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    );
+; PTX-NEXT:    call.uni (retval0), escape, (param0);
 ; PTX-NEXT:    ld.param.b64 %rd3, [retval0];
 ; PTX-NEXT:    } // callseq 1
 ; PTX-NEXT:    ret;
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 54495cf0d61f3..d268562914755 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -153,11 +153,7 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
 ; PTX-NEXT:    { // callseq 0, 0
 ; PTX-NEXT:    .param .b64 param0;
 ; PTX-NEXT:    st.param.b64 [param0], %rd1;
-; PTX-NEXT:    call.uni
-; PTX-NEXT:    _Z6escapePv,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    );
+; PTX-NEXT:    call.uni _Z6escapePv, (param0);
 ; PTX-NEXT:    } // callseq 0
 ; PTX-NEXT:    ret;
 entry:
@@ -198,11 +194,7 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
 ; PTX-NEXT:    { // callseq 1, 0
 ; PTX-NEXT:    .param .b64 param0;
 ; PTX-NEXT:    st.param.b64 [param0], %rd3;
-; PTX-NEXT:    call.uni
-; PTX-NEXT:    _Z6escapePv,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    );
+; PTX-NEXT:    call.uni _Z6escapePv, (param0);
 ; PTX-NEXT:    } // callseq 1
 ; PTX-NEXT:    ret;
 entry:
@@ -902,11 +894,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
 ; PTX-NEXT:    { // callseq 2, 0
 ; PTX-NEXT:    .param .align 4 .b8 param0[4];
 ; PTX-NEXT:    st.param.b32 [param0], %r1;
-; PTX-NEXT:    call.uni
-; PTX-NEXT:    device_func,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    );
+; PTX-NEXT:    call.uni device_func, (param0);
 ; PTX-NEXT:    } // callseq 2
 ; PTX-NEXT:    ret;
   call void @device_func(ptr byval(i32) align 4 %input)
@@ -929,11 +917,7 @@ define void @device_func(ptr byval(i32) align 4 %input) {
 ; PTX-NEXT:    { // callseq 3, 0
 ; PTX-NEXT:    .param .align 4 .b8 param0[4];
 ; PTX-NEXT:    st.param.b32 [param0], %r1;
-; PTX-NEXT:    call.uni
-; PTX-NEXT:    device_func,
-; PTX-NEXT:    (
-; PTX-NEXT:    param0
-; PTX-NEXT:    );
+; PTX-NEXT:    call.uni device_func, (param0);
 ; PTX-NEXT:    } // callseq 3
 ; PTX-NEXT:    ret;
   call void @device_func(ptr byval(i32) align 4 %input)
diff --git a/llvm/test/CodeGen/NVPTX/misched_func_call.ll b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
index 7e907990147a5..2e9eb6913ac0e 100644
--- a/llvm/test/CodeGen/NVPTX/misched_func_call.ll
+++ b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
@@ -21,11 +21,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT:    .param .b64 param0;
 ; CHECK-NEXT:    st.param.b64 [param0], 0d0000000000000000;
 ; CHECK-NEXT:    .param .b64 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    quux,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), quux, (param0);
 ; CHECK-NEXT:    ld.param.b64 %rd1, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    mul.lo.s32 %r7, %r10, %r3;
diff --git a/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll b/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll
index a1f0577c2218b..448960181ae42 100644
--- a/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll
+++ b/llvm/test/CodeGen/NVPTX/naked-fn-with-frame-pointer.ll
@@ -11,10 +11,7 @@ define dso_local void @naked() naked "frame-pointer"="all" {
 ; CHECK-32-EMPTY:
 ; CHECK-32-NEXT:  // %bb.0:
 ; CHECK-32-NEXT:    { // callseq 0, 0
-; CHECK-32-NEXT:    call.uni
-; CHECK-32-NEXT:    main,
-; CHECK-32-NEXT:    (
-; CHECK-32-NEXT:    );
+; CHECK-32-NEXT:    call.uni main, ();
 ; CHECK-32-NEXT:    } // callseq 0
 ; CHECK-32-NEXT:    // begin inline asm
 ; CHECK-32-NEXT:    exit;
@@ -26,10 +23,7 @@ define dso_local void @naked() naked "frame-pointer"="all" {
 ; CHECK-64-EMPTY:
 ; CHECK-64-NEXT:  // %bb.0:
 ; CHECK-64-NEXT:    { // callseq 0, 0
-; CHECK-64-NEXT:    call.uni
-; CHECK-64-NEXT:    main,
-; CHECK-64-NEXT:    (
-; CHECK-64-NEXT:    );
+; CHECK-64-NEXT:    call.uni main, ();
 ; CHECK-64-NEXT:    } // callseq 0
 ; CHECK-64-NEXT:    // begin inline asm
 ; CHECK-64-NEXT:    exit;
@@ -45,10 +39,7 @@ define dso_local void @normal() "frame-pointer"="all" {
 ; CHECK-32-EMPTY:
 ; CHECK-32-NEXT:  // %bb.0:
 ; CHECK-32-NEXT:    { // callseq 1, 0
-; CHECK-32-NEXT:    call.uni
-; CHECK-32-NEXT:    main,
-; CHECK-32-NEXT:    (
-; CHECK-32-NEXT:    );
+; CHECK-32-NEXT:    call.uni main, ();
 ; CHECK-32-NEXT:    } // callseq 1
 ; CHECK-32-NEXT:    // begin inline asm
 ; CHECK-32-NEXT:    exit;
@@ -60,10 +51,7 @@ define dso_local void @normal() "frame-pointer"="all" {
 ; CHECK-64-EMPTY:
 ; CHECK-64-NEXT:  // %bb.0:
 ; CHECK-64-NEXT:    { // callseq 1, 0
-; CHECK-64-NEXT:    call.uni
-; CHECK-64-NEXT:    main,
-; CHECK-64-NEXT:    (
-; CHECK-64-NEXT:    );
+; CHECK-64-NEXT:    call.uni main, ();
 ; CHECK-64-NEXT:    } // callseq 1
 ; CHECK-64-NEXT:    // begin inline asm
 ; CHECK-64-NEXT:    exit;
diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll
index 4fc8786c1e2fe..cd2664e913824 100644
--- a/llvm/test/CodeGen/NVPTX/param-add.ll
+++ b/llvm/test/CodeGen/NVPTX/param-add.ll
@@ -37,11 +37,7 @@ define i32 @test(%struct.1float alignstack(32) %data) {
 ; CHECK-NEXT:    st.param.b8 [param0+2], %r12;
 ; CHECK-NEXT:    st.param.b8 [param0+3], %r13;
 ; CHECK-NEXT:    .param .b32 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    callee,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), callee, (param0);
 ; CHECK-NEXT:    ld.param.b32 %r14, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r14;
diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll
index 4bea710e6dd93..263477df1dbfe 100644
--- a/llvm/test/CodeGen/NVPTX/param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll
@@ -32,8 +32,7 @@
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], [[C]]
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni
-; CHECK-NEXT: test_i1,
+; CHECK:      call.uni (retval0), test_i1,
 ; CHECK:      ld.param.b32    [[R8:%r[0-9]+]], [retval0];
 ; CHECK:      and.b32         [[R:%r[0-9]+]], [[R8]], 1;
 ; CHECK:      st.param.b32    [func_retval0], [[R]];
@@ -76,8 +75,7 @@ define signext i1 @test_i1s(i1 signext %a) {
 ; CHECK-DAG:  st.param.b8     [param0], [[E0]];
 ; CHECK-DAG:  st.param.b8     [param0+2], [[E2]];
 ; CHECK:      .param .align 1 .b8 retval0[1];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v3i1,
+; CHECK:      call.uni (retval0), test_v3i1,
 ; CHECK-DAG:  ld.param.b8     [[RE0:%rs[0-9]+]], [retval0];
 ; CHECK-DAG:  ld.param.b8     [[RE2:%rs[0-9]+]], [retval0+2];
 ; CHECK-DAG:  st.param.b8     [func_retval0], [[RE0]]
@@ -95,8 +93,7 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) {
 ; CHECK:      .param .align 1 .b8 param0[1];
 ; CHECK:      st.param.b8  [param0], [[E0]];
 ; CHECK:      .param .align 1 .b8 retval0[1];
-; CHECK:      call.uni (retval0),
-; CHECK:      test_v4i1,
+; CHECK:      call.uni (retval0), test_v4i1,
 ; CHECK:      ld.param.b8  [[RE0:%rs[0-9]+]], [retval0];
 ; CHECK:      ld.param.b8  [[RE1:%rs[0-9]+]], [retval0+1];
 ; CHECK:      ld.param.b8  [[RE2:%rs[0-9]+]], [retval0+2];
@@ -120,8 +117,7 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) {
 ; CHECK-DAG:  st.param.b8     [param0], [[E0]];
 ; CHECK-DAG:  st.param.b8     [param0+4], [[E4]];
 ; CHECK:      .param .align 1 .b8 retval0[1];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v5i1,
+; CHECK:      call.uni (retval0), test_v5i1,
 ; CHECK-DAG:  ld.param.b8  [[RE0:%rs[0-9]+]], [retval0];
 ; CHECK-DAG:  ld.param.b8     [[RE4:%rs[0-9]+]], [retval0+4];
 ; CHECK-DAG:  st.param.b8  [func_retval0], [[RE0]]
@@ -139,8 +135,7 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK:      test_i2,
+; CHECK:      call.uni (retval0), test_i2,
 ; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
 ; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -156,8 +151,7 @@ define i2 @test_i2(i2 %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK:      test_i3,
+; CHECK:      call.uni (retval0), test_i3,
 ; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
 ; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -176,8 +170,7 @@ define i3 @test_i3(i3 %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], [[A]];
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK:      test_i8,
+; CHECK:      call.uni (retval0), test_i8,
 ; CHECK:      ld.param.b32    [[R32:%r[0-9]+]], [retval0];
 ; CHECK:      and.b32         [[R:%r[0-9]+]], [[R32]], 255;
 ; CHECK:      st.param.b32    [func_retval0], [[R]];
@@ -196,8 +189,7 @@ define i8 @test_i8(i8 %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], [[A]];
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK:      test_i8s,
+; CHECK:      call.uni (retval0), test_i8s,
 ; CHECK:      ld.param.b32    [[R32:%r[0-9]+]], [retval0];
 ; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
 ; CHECK:      cvt.u16.u32     [[R16:%rs[0-9]+]], [[R32]];
@@ -216,8 +208,7 @@ define signext i8 @test_i8s(i8 signext %a) {
 ; CHECK:      .param .align 4 .b8 param0[4];
 ; CHECK:      st.param.b32  [param0], [[R]]
 ; CHECK:      .param .align 4 .b8 retval0[4];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v3i8,
+; CHECK:      call.uni (retval0), test_v3i8,
 ; CHECK:      ld.param.b32  [[RE:%r[0-9]+]], [retval0];
 ; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very
 ; interesting here, so it's skipped.
@@ -235,8 +226,7 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) {
 ; CHECK:      .param .align 4 .b8 param0[4];
 ; CHECK:      st.param.b32  [param0], [[R]];
 ; CHECK:      .param .align 4 .b8 retval0[4];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v4i8,
+; CHECK:      call.uni (retval0), test_v4i8,
 ; CHECK:      ld.param.b32  [[RET:%r[0-9]+]], [retval0];
 ; CHECK:      st.param.b32  [func_retval0], [[RET]];
 ; CHECK-NEXT: ret;
@@ -254,8 +244,7 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) {
 ; CHECK-DAG:  st.param.v4.b8  [param0], 
 ; CHECK-DAG:  st.param.b8     [param0+4], [[E4]];
 ; CHECK:      .param .align 8 .b8 retval0[8];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v5i8,
+; CHECK:      call.uni (retval0), test_v5i8,
 ; CHECK-DAG:  ld.param.v4.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
 ; CHECK-DAG:  ld.param.b8     [[RE4:%rs[0-9]+]], [retval0+4];
 ; CHECK-DAG:  st.param.v4.b8  [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -272,8 +261,7 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) {
 ; CHECK:      ld.param.b16    {{%rs[0-9]+}}, [test_i11_param_0];
 ; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i11,
+; CHECK:      call.uni (retval0), test_i11,
 ; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
 ; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -290,8 +278,7 @@ define i11 @test_i11(i11 %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], [[E32]];
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i16,
+; CHECK:      call.uni (retval0), test_i16,
 ; CHECK:      ld.param.b32    [[RE32:%r[0-9]+]], [retval0];
 ; CHECK:      and.b32         [[R:%r[0-9]+]], [[RE32]], 65535;
 ; CHECK:      st.param.b32    [func_retval0], [[R]];
@@ -309,8 +296,7 @@ define i16 @test_i16(i16 %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], [[E32]];
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i16s,
+; CHECK:      call.uni (retval0), test_i16s,
 ; CHECK:      ld.param.b32    [[RE32:%r[0-9]+]], [retval0];
 ; CHECK:      cvt.s32.s16     [[R:%r[0-9]+]], [[RE32]];
 ; CHECK:      st.param.b32    [func_retval0], [[R]];
@@ -329,8 +315,7 @@ define signext i16 @test_i16s(i16 signext %a) {
 ; CHECK:      st.param.v2.b16 [param0], {[[E0]], [[E1]]};
 ; CHECK:      st.param.b16    [param0+4], [[E2]];
 ; CHECK:      .param .align 8 .b8 retval0[8];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v3i16,
+; CHECK:      call.uni (retval0), test_v3i16,
 ; CHECK:      ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0];
 ; CHECK:      ld.param.b16    [[RE2:%rs[0-9]+]], [retval0+4];
 ; CHECK-DAG:  st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]};
@@ -348,8 +333,7 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) {
 ; CHECK:      .param .align 8 .b8 param0[8];
 ; CHECK:      st.param.v2.b32 [param0], {[[E0]], [[E1]]};
 ; CHECK:      .param .align 8 .b8 retval0[8];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v4i16,
+; CHECK:      call.uni (retval0), test_v4i16,
 ; CHECK:      ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
 ; CHECK:      st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
 ; CHECK-NEXT: ret;
@@ -367,8 +351,7 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) {
 ; CHECK-DAG:  st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
 ; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
 ; CHECK:      .param .align 16 .b8 retval0[16];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v5i16,
+; CHECK:      call.uni (retval0), test_v5i16,
 ; CHECK-DAG:  ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
 ; CHECK-DAG:  ld.param.b16    [[RE4:%rs[0-9]+]], [retval0+8];
 ; CHECK-DAG:  st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -386,8 +369,7 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) {
 ; CHECK:      .param .align 2 .b8 param0[2];
 ; CHECK:      st.param.b16    [param0], [[E]];
 ; CHECK:      .param .align 2 .b8 retval0[2];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_f16,
+; CHECK:      call.uni (retval0), test_f16,
 ; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0];
 ; CHECK:      st.param.b16    [func_retval0], [[R]]
 ; CHECK-NEXT: ret;
@@ -403,8 +385,7 @@ define half @test_f16(half %a) {
 ; CHECK:      .param .align 4 .b8 param0[4];
 ; CHECK:      st.param.b32    [param0], [[E]];
 ; CHECK:      .param .align 4 .b8 retval0[4];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v2f16,
+; CHECK:      call.uni (retval0), test_v2f16,
 ; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
 ; CHECK:      st.param.b32    [func_retval0], [[R]]
 ; CHECK-NEXT: ret;
@@ -420,8 +401,7 @@ define <2 x half> @test_v2f16(<2 x half> %a) {
 ; CHECK:      .param .align 2 .b8 param0[2];
 ; CHECK:      st.param.b16    [param0], [[E]];
 ; CHECK:      .param .align 2 .b8 retval0[2];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_bf16,
+; CHECK:      call.uni (retval0), test_bf16,
 ; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0];
 ; CHECK:      st.param.b16    [func_retval0], [[R]]
 ; CHECK-NEXT: ret;
@@ -437,8 +417,7 @@ define bfloat @test_bf16(bfloat %a) {
 ; CHECK:      .param .align 4 .b8 param0[4];
 ; CHECK:      st.param.b32    [param0], [[E]];
 ; CHECK:      .param .align 4 .b8 retval0[4];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v2bf16,
+; CHECK:      call.uni (retval0), test_v2bf16,
 ; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
 ; CHECK:      st.param.b32    [func_retval0], [[R]]
 ; CHECK-NEXT: ret;
@@ -457,8 +436,7 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) {
 ; CHECK-DAG:  st.param.v2.b16 [param0], {[[E0]], [[E1]]};
 ; CHECK-DAG:  st.param.b16    [param0+4], [[E2]];
 ; CHECK:      .param .align 8 .b8 retval0[8];
-; CHECK:      call.uni (retval0),
-; CHECK:      test_v3f16,
+; CHECK:      call.uni (retval0),      test_v3f16,
 ; CHECK-DAG:  ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0];
 ; CHECK-DAG:  ld.param.b16    [[R2:%rs[0-9]+]], [retval0+4];
 ; CHECK-DAG:  st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]};
@@ -476,8 +454,7 @@ define <3 x half> @test_v3f16(<3 x half> %a) {
 ; CHECK:      .param .align 8 .b8 param0[8];
 ; CHECK:      st.param.v2.b32 [param0], {[[R01]], [[R23]]};
 ; CHECK:      .param .align 8 .b8 retval0[8];
-; CHECK:      call.uni (retval0),
-; CHECK:      test_v4f16,
+; CHECK:      call.uni (retval0),      test_v4f16,
 ; CHECK:      ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0];
 ; CHECK:      st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]};
 ; CHECK:      ret;
@@ -495,8 +472,7 @@ define <4 x half> @test_v4f16(<4 x half> %a) {
 ; CHECK-DAG:  st.param.v4.b16 [param0],
 ; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
 ; CHECK:      .param .align 16 .b8 retval0[16];
-; CHECK:      call.uni (retval0),
-; CHECK:      test_v5f16,
+; CHECK:      call.uni (retval0),      test_v5f16,
 ; CHECK-DAG:  ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
 ; CHECK-DAG:  ld.param.b16    [[R4:%rs[0-9]+]], [retval0+8];
 ; CHECK-DAG:  st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
@@ -514,8 +490,7 @@ define <5 x half> @test_v5f16(<5 x half> %a) {
 ; CHECK:      .param .align 16 .b8 param0[16];
 ; CHECK:      st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
 ; CHECK:      .param .align 16 .b8 retval0[16];
-; CHECK:      call.uni (retval0),
-; CHECK:      test_v8f16,
+; CHECK:      call.uni (retval0), test_v8f16,
 ; CHECK:      ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0];
 ; CHECK:      st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
 ; CHECK:      ret;
@@ -535,8 +510,7 @@ define <8 x half> @test_v8f16(<8 x half> %a) {
 ; CHECK-DAG:  st.param.v4.b16 [param0+8],
 ; CHECK-DAG:  st.param.b16    [param0+16], [[E8]];
 ; CHECK:      .param .align 32 .b8 retval0[32];
-; CHECK:      call.uni (retval0),
-; CHECK:      test_v9f16,
+; CHECK:      call.uni (retval0), test_v9f16,
 ; CHECK-DAG:  ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
 ; CHECK-DAG:  ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
 ; CHECK-DAG:  ld.param.b16    [[R8:%rs[0-9]+]], [retval0+16];
@@ -557,8 +531,7 @@ define <9 x half> @test_v9f16(<9 x half> %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i19,
+; CHECK:      call.uni (retval0), test_i19,
 ; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
 ; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -575,8 +548,7 @@ define i19 @test_i19(i19 %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i23,
+; CHECK:      call.uni (retval0), test_i23,
 ; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
 ; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -593,8 +565,7 @@ define i23 @test_i23(i23 %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i24,
+; CHECK:      call.uni (retval0), test_i24,
 ; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
 ; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -610,8 +581,7 @@ define i24 @test_i24(i24 %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i29,
+; CHECK:      call.uni (retval0), test_i29,
 ; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
 ; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -627,8 +597,7 @@ define i29 @test_i29(i29 %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], [[E]];
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i32,
+; CHECK:      call.uni (retval0), test_i32,
 ; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
 ; CHECK:      st.param.b32    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -646,8 +615,7 @@ define i32 @test_i32(i32 %a) {
 ; CHECK:      st.param.v2.b32  [param0], {[[E0]], [[E1]]};
 ; CHECK:      st.param.b32     [param0+8], [[E2]];
 ; CHECK:      .param .align 16 .b8 retval0[16];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v3i32,
+; CHECK:      call.uni (retval0), test_v3i32,
 ; CHECK:      ld.param.v2.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
 ; CHECK:      ld.param.b32     [[RE2:%r[0-9]+]], [retval0+8];
 ; CHECK-DAG:  st.param.v2.b32  [func_retval0], {[[RE0]], [[RE1]]};
@@ -665,8 +633,7 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) {
 ; CHECK:      .param .align 16 .b8 param0[16];
 ; CHECK:      st.param.v4.b32  [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
 ; CHECK:      .param .align 16 .b8 retval0[16];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v4i32,
+; CHECK:      call.uni (retval0), test_v4i32,
 ; CHECK:      ld.param.v4.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
 ; CHECK:      st.param.v4.b32  [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
 ; CHECK-NEXT: ret;
@@ -684,8 +651,7 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) {
 ; CHECK-DAG:  st.param.v4.b32  [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
 ; CHECK-DAG:  st.param.b32     [param0+16], [[E4]];
 ; CHECK:      .param .align 32 .b8 retval0[32];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v5i32,
+; CHECK:      call.uni (retval0), test_v5i32,
 ; CHECK-DAG:  ld.param.v4.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
 ; CHECK-DAG:  ld.param.b32     [[RE4:%r[0-9]+]], [retval0+16];
 ; CHECK-DAG:  st.param.v4.b32  [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -703,8 +669,7 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) {
 ; CHECK:      .param .b32 param0;
 ; CHECK:      st.param.b32    [param0], [[E]];
 ; CHECK:      .param .b32 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_f32,
+; CHECK:      call.uni (retval0), test_f32,
 ; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
 ; CHECK:      st.param.b32    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -721,8 +686,7 @@ define float @test_f32(float %a) {
 ; CHECK:      .param .b64 param0;
 ; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
 ; CHECK:      .param .b64 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i40,
+; CHECK:      call.uni (retval0), test_i40,
 ; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
 ; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -739,8 +703,7 @@ define i40 @test_i40(i40 %a) {
 ; CHECK:      .param .b64 param0;
 ; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
 ; CHECK:      .param .b64 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i47,
+; CHECK:      call.uni (retval0), test_i47,
 ; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
 ; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -757,8 +720,7 @@ define i47 @test_i47(i47 %a) {
 ; CHECK:      .param .b64 param0;
 ; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
 ; CHECK:      .param .b64 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i48,
+; CHECK:      call.uni (retval0), test_i48,
 ; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
 ; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -776,8 +738,7 @@ define i48 @test_i48(i48 %a) {
 ; CHECK:      .param .b64 param0;
 ; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
 ; CHECK:      .param .b64 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i51,
+; CHECK:      call.uni (retval0), test_i51,
 ; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
 ; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -795,8 +756,7 @@ define i51 @test_i51(i51 %a) {
 ; CHECK:      .param .b64 param0;
 ; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
 ; CHECK:      .param .b64 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i56,
+; CHECK:      call.uni (retval0), test_i56,
 ; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
 ; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -812,8 +772,7 @@ define i56 @test_i56(i56 %a) {
 ; CHECK:      .param .b64 param0;
 ; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
 ; CHECK:      .param .b64 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i57,
+; CHECK:      call.uni (retval0), test_i57,
 ; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
 ; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -829,8 +788,7 @@ define i57 @test_i57(i57 %a) {
 ; CHECK:      .param .b64 param0;
 ; CHECK:      st.param.b64    [param0], [[E]];
 ; CHECK:      .param .b64 retval0;
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_i64,
+; CHECK:      call.uni (retval0), test_i64,
 ; CHECK:      ld.param.b64    [[R:%rd[0-9]+]], [retval0];
 ; CHECK:      st.param.b64    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -848,8 +806,7 @@ define i64 @test_i64(i64 %a) {
 ; CHECK:      st.param.v2.b64  [param0], {[[E0]], [[E1]]};
 ; CHECK:      st.param.b64     [param0+16], [[E2]];
 ; CHECK:      .param .align 32 .b8 retval0[32];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v3i64,
+; CHECK:      call.uni (retval0), test_v3i64,
 ; CHECK:      ld.param.v2.b64  {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
 ; CHECK:      ld.param.b64     [[RE2:%rd[0-9]+]], [retval0+16];
 ; CHECK-DAG:  st.param.v2.b64  [func_retval0], {[[RE0]], [[RE1]]};
@@ -872,8 +829,7 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) {
 ; CHECK:      st.param.v2.b64  [param0], {[[E0]], [[E1]]};
 ; CHECK:      st.param.v2.b64  [param0+16], {[[E2]], [[E3]]};
 ; CHECK:      .param .align 32 .b8 retval0[32];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_v4i64,
+; CHECK:      call.uni (retval0), test_v4i64,
 ; CHECK:      ld.param.v2.b64  {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
 ; CHECK:      ld.param.v2.b64  {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
 ; CHECK-DAG:  st.param.v2.b64  [func_retval0+16], {[[RE2]], [[RE3]]};
@@ -893,8 +849,7 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) {
 ; CHECK:      .param .align 1 .b8 param0[1];
 ; CHECK:      st.param.b8    [param0], [[A]]
 ; CHECK:      .param .align 1 .b8 retval0[1];
-; CHECK:      call.uni
-; CHECK-NEXT: test_s_i1,
+; CHECK:      call.uni (retval0), test_s_i1,
 ; CHECK:      ld.param.b8    [[R:%rs[0-9]+]], [retval0];
 ; CHECK:      st.param.b8    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -910,8 +865,7 @@ define %s_i1 @test_s_i1(%s_i1 %a) {
 ; CHECK:      .param .align 1 .b8 param0[1];
 ; CHECK:      st.param.b8    [param0], [[A]]
 ; CHECK:      .param .align 1 .b8 retval0[1];
-; CHECK:      call.uni
-; CHECK-NEXT: test_s_i8,
+; CHECK:      call.uni (retval0), test_s_i8,
 ; CHECK:      ld.param.b8    [[R:%rs[0-9]+]], [retval0];
 ; CHECK:      st.param.b8    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -927,8 +881,7 @@ define %s_i8 @test_s_i8(%s_i8 %a) {
 ; CHECK:      .param .align 2 .b8 param0[2];
 ; CHECK:      st.param.b16    [param0], [[A]]
 ; CHECK:      .param .align 2 .b8 retval0[2];
-; CHECK:      call.uni
-; CHECK-NEXT: test_s_i16,
+; CHECK:      call.uni (retval0), test_s_i16,
 ; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0];
 ; CHECK:      st.param.b16    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -944,8 +897,7 @@ define %s_i16 @test_s_i16(%s_i16 %a) {
 ; CHECK:      .param .align 2 .b8 param0[2];
 ; CHECK:      st.param.b16    [param0], [[A]]
 ; CHECK:      .param .align 2 .b8 retval0[2];
-; CHECK:      call.uni
-; CHECK-NEXT: test_s_f16,
+; CHECK:      call.uni (retval0), test_s_f16,
 ; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0];
 ; CHECK:      st.param.b16    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -961,8 +913,7 @@ define %s_f16 @test_s_f16(%s_f16 %a) {
 ; CHECK:      .param .align 4 .b8 param0[4]
 ; CHECK:      st.param.b32    [param0], [[E]];
 ; CHECK:      .param .align 4 .b8 retval0[4];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_s_i32,
+; CHECK:      call.uni (retval0), test_s_i32,
 ; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
 ; CHECK:      st.param.b32    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -978,8 +929,7 @@ define %s_i32 @test_s_i32(%s_i32 %a) {
 ; CHECK:      .param .align 4 .b8 param0[4]
 ; CHECK:      st.param.b32    [param0], [[E]];
 ; CHECK:      .param .align 4 .b8 retval0[4];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_s_f32,
+; CHECK:      call.uni (retval0), test_s_f32,
 ; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
 ; CHECK:      st.param.b32    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -995,8 +945,7 @@ define %s_f32 @test_s_f32(%s_f32 %a) {
 ; CHECK:      .param .align 8 .b8 param0[8];
 ; CHECK:      st.param.b64    [param0], [[E]];
 ; CHECK:      .param .align 8 .b8 retval0[8];
-; CHECK:      call.uni (retval0),
-; CHECK-NEXT: test_s_i64,
+; CHECK:      call.uni (retval0), test_s_i64,
 ; CHECK:      ld.param.b64    [[R:%rd[0-9]+]], [retval0];
 ; CHECK:      st.param.b64    [func_retval0], [[R]];
 ; CHECK-NEXT: ret;
@@ -1021,8 +970,7 @@ define %s_i64 @test_s_i64(%s_i64 %a) {
 ; CHECK-DAG:    st.param.b32    [param0+12], [[E3]];
 ; CHECK-DAG:    st.param.b64    [param0+16], [[E4]];
 ; CHECK:        .param .align 8 .b8 retval0[24];
-; CHECK:        call.uni (retval0),
-; CHECK-NEXT:   test_s_i32f32,
+; CHECK:        call.uni (retval0), test_s_i32f32,
 ; CHECK-DAG:    ld.param.b32    [[RE0:%r[0-9]+]], [retval0];
 ; CHECK-DAG:    ld.param.b32    [[RE1:%r[0-9]+]], [retval0+4];
 ; CHECK-DAG:    ld.param.b32    [[RE2:%r[0-9]+]], [retval0+8];
@@ -1051,8 +999,7 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
 ; CHECK:        st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
 ; CHECK:        st.param.b64    [param0+16], [[E4]];
 ; CHECK:        .param .align 8 .b8 retval0[24];
-; CHECK:        call.uni (retval0),
-; CHECK-NEXT:   test_s_i32x4,
+; CHECK:        call.uni (retval0), test_s_i32x4,
 ; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
 ; CHECK:        ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
 ; CHECK:        ld.param.b64    [[RE4:%rd[0-9]+]], [retval0+16];
@@ -1081,8 +1028,7 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
 ; CHECK:        st.param.b32    [param0+16], [[E4]];
 ; CHECK:        st.param.b64    [param0+24], [[E5]];
 ; CHECK:        .param .align 8 .b8 retval0[32];
-; CHECK:        call.uni (retval0),
-; CHECK:        test_s_i1i32x4,
+; CHECK:        call.uni (retval0), test_s_i1i32x4,
 ; CHECK:        (
 ; CHECK:        param0
 ; CHECK:        );
@@ -1160,8 +1106,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
 ; CHECK-DAG:        st.param.b8     [param0+23],
 ; CHECK-DAG:        st.param.b8     [param0+24],
 ; CHECK:            .param .align 1 .b8 retval0[25];
-; CHECK:            call.uni (retval0),
-; CHECK-NEXT:       test_s_i1i32x4p,
+; CHECK:            call.uni (retval0), test_s_i1i32x4p,
 ; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0];
 ; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+1];
 ; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+2];
@@ -1237,8 +1182,7 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
 ; CHECK:        st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
 ; CHECK:        st.param.b32    [param0+64], [[E15]];
 ; CHECK:        .param .align 16 .b8 retval0[80];
-; CHECK:        call.uni (retval0),
-; CHECK:        test_s_crossfield,
+; CHECK:        call.uni (retval0), test_s_crossfield,
 ; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
 ; CHECK:        ld.param.b32    [[RE2:%r[0-9]+]], [retval0+8];
 ; CHECK:        ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];
diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll
index 22a648c7a9786..f490c5f73d425 100644
--- a/llvm/test/CodeGen/NVPTX/param-overalign.ll
+++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=nvptx | FileCheck %s
 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
 
@@ -18,27 +19,23 @@ target triple = "nvptx64-nvidia-cuda"
 ; CHECK-NEXT: ;
 
 define float @caller_md(float %a, float %b) {
-; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) caller_md(
-; CHECK-NEXT:         .param .b32 caller_md_param_0,
-; CHECK-NEXT:         .param .b32 caller_md_param_1
-; CHECK-NEXT: )
-; CHECK-NEXT: {
-
-; CHECK:         ld.param.b32 %r1, [caller_md_param_0];
+; CHECK-LABEL: caller_md(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [caller_md_param_0];
 ; CHECK-NEXT:    ld.param.b32 %r2, [caller_md_param_1];
-; CHECK-NEXT:    {
+; CHECK-NEXT:    { // callseq 0, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v2.b32 [param0], {%r1, %r2};
 ; CHECK-NEXT:    .param .b32 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    callee_md,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), callee_md, (param0);
 ; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
-; CHECK-NEXT:    }
+; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NEXT:    ret;
+
   %s1 = insertvalue %struct.float2 poison, float %a, 0
   %s2 = insertvalue %struct.float2 %s1, float %b, 1
   %r = call float @callee_md(%struct.float2 %s2)
@@ -46,15 +43,16 @@ define float @caller_md(float %a, float %b) {
 }
 
 define float @callee_md(%struct.float2 alignstack(8) %a) {
-; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) callee_md(
-; CHECK-NEXT:         .param .align 8 .b8 callee_md_param_0[8]
-; CHECK-NEXT: )
-; CHECK-NEXT: {
-
-; CHECK:         ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0];
+; CHECK-LABEL: callee_md(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.v2.b32 {%r1, %r2}, [callee_md_param_0];
 ; CHECK-NEXT:    add.rn.f32 %r3, %r1, %r2;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NEXT:    ret;
+
   %v0 = extractvalue %struct.float2 %a, 0
   %v1 = extractvalue %struct.float2 %a, 1
   %2 = fadd float %v0, %v1
@@ -62,27 +60,23 @@ define float @callee_md(%struct.float2 alignstack(8) %a) {
 }
 
 define float @caller(float %a, float %b) {
-; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) caller(
-; CHECK-NEXT:         .param .b32 caller_param_0,
-; CHECK-NEXT:         .param .b32 caller_param_1
-; CHECK-NEXT: )
-; CHECK-NEXT: {
-
-; CHECK:         ld.param.b32 %r1, [caller_param_0];
+; CHECK-LABEL: caller(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [caller_param_0];
 ; CHECK-NEXT:    ld.param.b32 %r2, [caller_param_1];
-; CHECK-NEXT:    {
+; CHECK-NEXT:    { // callseq 1, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v2.b32 [param0], {%r1, %r2};
 ; CHECK-NEXT:    .param .b32 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    callee,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), callee, (param0);
 ; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
-; CHECK-NEXT:    }
+; CHECK-NEXT:    } // callseq 1
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NEXT:    ret;
+
   %s1 = insertvalue %struct.float2 poison, float %a, 0
   %s2 = insertvalue %struct.float2 %s1, float %b, 1
   %r = call float @callee(%struct.float2 %s2)
@@ -90,15 +84,16 @@ define float @caller(float %a, float %b) {
 }
 
 define float @callee(%struct.float2 alignstack(8) %a ) {
-; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) callee(
-; CHECK-NEXT:         .param .align 8 .b8 callee_param_0[8]
-; CHECK-NEXT: )
-; CHECK-NEXT: {
-
-; CHECK:         ld.param.v2.b32 {%r1, %r2}, [callee_param_0];
+; CHECK-LABEL: callee(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.v2.b32 {%r1, %r2}, [callee_param_0];
 ; CHECK-NEXT:    add.rn.f32 %r3, %r1, %r2;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-NEXT:    ret;
+
   %v0 = extractvalue %struct.float2 %a, 0
   %v1 = extractvalue %struct.float2 %a, 1
   %2 = fadd float %v0, %v1
@@ -106,9 +101,15 @@ define float @callee(%struct.float2 alignstack(8) %a ) {
 }
 
 define alignstack(8) %struct.float2 @aligned_return(%struct.float2 %a ) {
-; CHECK-LABEL: .visible .func  (.param .align 8 .b8 func_retval0[8]) aligned_return(
-; CHECK-NEXT:         .param .align 4 .b8 aligned_return_param_0[8]
-; CHECK-NEXT: )
-; CHECK-NEXT: {
+; CHECK-LABEL: aligned_return(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [aligned_return_param_0+4];
+; CHECK-NEXT:    ld.param.b32 %r2, [aligned_return_param_0];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0+4], %r1;
+; CHECK-NEXT:    ret;
   ret %struct.float2 %a
 }
diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
index abb1aff867754..892e49a5fe82a 100644
--- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
+++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
@@ -86,11 +86,7 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       .param .b32 param0;
   ; CHECK:       st.param.b32 [param0], {{%r[0-9]+}};
   ; CHECK:       .param .align 16 .b8 retval0[4];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St4x1,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St4x1, (param0);
   ; CHECK:       ld.param.b32 {{%r[0-9]+}}, [retval0];
   %1 = load i32, ptr %in, align 4
   %call = tail call fastcc [1 x i32] @callee_St4x1(i32 %1) #2
@@ -118,11 +114,7 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       .param .align 16 .b8 param0[8];
   ; CHECK:       st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
   ; CHECK:       .param .align 16 .b8 retval0[8];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St4x2,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St4x2, (param0);
   ; CHECK:       ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
   %agg.tmp = alloca %struct.St4x2, align 8
   %1 = load i64, ptr %in, align 4
@@ -160,11 +152,7 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
   ; CHECK:       st.param.b32    [param0+8], {{%r[0-9]+}};
   ; CHECK:       .param .align 16 .b8 retval0[12];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St4x3,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St4x3, (param0);
   ; CHECK:       ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
   ; CHECK:       ld.param.b32    {{%r[0-9]+}},  [retval0+8];
   %call = tail call fastcc [3 x i32] @callee_St4x3(ptr noundef nonnull byval(%struct.St4x3) align 4 %in) #2
@@ -207,11 +195,7 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       .param .align 16 .b8 param0[16];
   ; CHECK:       st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
   ; CHECK:       .param .align 16 .b8 retval0[16];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St4x4,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St4x4, (param0);
   ; CHECK:       ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
   %call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2
   %.fca.0.extract = extractvalue [4 x i32] %call, 0
@@ -258,11 +242,7 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       st.param.v4.b32 [param0],  {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
   ; CHECK:       st.param.b32    [param0+16], {{%r[0-9]+}};
   ; CHECK:       .param .align 16 .b8 retval0[20];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St4x5,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St4x5, (param0);
   ; CHECK:       ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
   ; CHECK:       ld.param.b32    {{%r[0-9]+}},  [retval0+16];
   %call = tail call fastcc [5 x i32] @callee_St4x5(ptr noundef nonnull byval(%struct.St4x5) align 4 %in) #2
@@ -318,11 +298,7 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       st.param.v4.b32 [param0],  {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
   ; CHECK:       st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
   ; CHECK:       .param .align 16 .b8 retval0[24];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St4x6,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St4x6, (param0);
   ; CHECK:       ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
   ; CHECK:       ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
   %call = tail call fastcc [6 x i32] @callee_St4x6(ptr noundef nonnull byval(%struct.St4x6) align 4 %in) #2
@@ -385,11 +361,7 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
   ; CHECK:       st.param.b32    [param0+24], {{%r[0-9]+}};
   ; CHECK:       .param .align 16 .b8 retval0[28];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St4x7,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St4x7, (param0);
   ; CHECK:       ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
   ; CHECK:       ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
   ; CHECK:       ld.param.b32    {{%r[0-9]+}}, [retval0+24];
@@ -460,11 +432,7 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       st.param.v4.b32 [param0],  {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
   ; CHECK:       st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
   ; CHECK:       .param .align 16 .b8 retval0[32];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St4x8,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St4x8, (param0);
   ; CHECK:       ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
   ; CHECK:       ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
   %call = tail call fastcc [8 x i32] @callee_St4x8(ptr noundef nonnull byval(%struct.St4x8) align 4 %in) #2
@@ -537,11 +505,7 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       .param .b64 param0;
   ; CHECK:       st.param.b64 [param0], {{%rd[0-9]+}};
   ; CHECK:       .param .align 16 .b8 retval0[8];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St8x1,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St8x1, (param0);
   ; CHECK:       ld.param.b64 {{%rd[0-9]+}}, [retval0];
   %1 = load i64, ptr %in, align 8
   %call = tail call fastcc [1 x i64] @callee_St8x1(i64 %1) #2
@@ -569,11 +533,7 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       .param .align 16 .b8 param0[16];
   ; CHECK:       st.param.v2.b64 [param0],  {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
   ; CHECK:       .param .align 16 .b8 retval0[16];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St8x2,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St8x2, (param0);
   ; CHECK:       ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
   %call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2
   %.fca.0.extract = extractvalue [2 x i64] %call, 0
@@ -608,11 +568,7 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       st.param.v2.b64 [param0],  {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
   ; CHECK:       st.param.b64    [param0+16], {{%rd[0-9]+}};
   ; CHECK:       .param .align 16 .b8 retval0[24];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St8x3,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St8x3, (param0);
   ; CHECK:       ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
   ; CHECK:       ld.param.b64    {{%rd[0-9]+}}, [retval0+16];
   %call = tail call fastcc [3 x i64] @callee_St8x3(ptr noundef nonnull byval(%struct.St8x3) align 8 %in) #2
@@ -656,11 +612,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
   ; CHECK:       st.param.v2.b64 [param0],  {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
   ; CHECK:       st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
   ; CHECK:       .param .align 16 .b8 retval0[32];
-  ; CHECK:       call.uni (retval0),
-  ; CHECK-NEXT:  callee_St8x4,
-  ; CHECK-NEXT:  (
-  ; CHECK-NEXT:  param0
-  ; CHECK-NEXT:  );
+  ; CHECK:       call.uni (retval0), callee_St8x4, (param0);
   ; CHECK:       ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
   ; CHECK:       ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16];
   %call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index b165b4cb4b262..f0813609268e9 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -131,11 +131,7 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
 ; CHECK-NEXT:    { // callseq 0, 0
 ; CHECK-NEXT:    .param .b64 param0;
 ; CHECK-NEXT:    st.param.b64 [param0], %rd3;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    use,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni use, (param0);
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
@@ -164,11 +160,7 @@ define i64 @test_negative_use_shl(i64 %x, i32 %y) {
 ; CHECK-NEXT:    { // callseq 1, 0
 ; CHECK-NEXT:    .param .b64 param0;
 ; CHECK-NEXT:    st.param.b64 [param0], %rd2;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    use,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni use, (param0);
 ; CHECK-NEXT:    } // callseq 1
 ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4;
 ; CHECK-NEXT:    ret;
diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
index bdab9958fe2b2..50d3e8049a947 100644
--- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll
+++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
@@ -28,11 +28,7 @@ define void @st_param_i8_i16() {
 ; CHECK-NEXT:    .param .align 2 .b8 param0[4];
 ; CHECK-NEXT:    st.param.b8 [param0], 1;
 ; CHECK-NEXT:    st.param.b16 [param0+2], 2;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_i8_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_i8_i16, (param0);
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    ret;
   call void @call_i8_i16(%struct.A { i8 1, i16 2 })
@@ -48,11 +44,7 @@ define void @st_param_i32() {
 ; CHECK-NEXT:    { // callseq 1, 0
 ; CHECK-NEXT:    .param .b32 param0;
 ; CHECK-NEXT:    st.param.b32 [param0], 3;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_i32, (param0);
 ; CHECK-NEXT:    } // callseq 1
 ; CHECK-NEXT:    ret;
   call void @call_i32(i32 3)
@@ -68,11 +60,7 @@ define void @st_param_i64() {
 ; CHECK-NEXT:    { // callseq 2, 0
 ; CHECK-NEXT:    .param .b64 param0;
 ; CHECK-NEXT:    st.param.b64 [param0], 4;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_i64,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_i64, (param0);
 ; CHECK-NEXT:    } // callseq 2
 ; CHECK-NEXT:    ret;
   call void @call_i64(i64 4)
@@ -88,11 +76,7 @@ define void @st_param_f32() {
 ; CHECK-NEXT:    { // callseq 3, 0
 ; CHECK-NEXT:    .param .b32 param0;
 ; CHECK-NEXT:    st.param.b32 [param0], 0f40A00000;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_f32, (param0);
 ; CHECK-NEXT:    } // callseq 3
 ; CHECK-NEXT:    ret;
   call void @call_f32(float 5.0)
@@ -108,11 +92,7 @@ define void @st_param_f64() {
 ; CHECK-NEXT:    { // callseq 4, 0
 ; CHECK-NEXT:    .param .b64 param0;
 ; CHECK-NEXT:    st.param.b64 [param0], 0d4018000000000000;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_f64,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_f64, (param0);
 ; CHECK-NEXT:    } // callseq 4
 ; CHECK-NEXT:    ret;
   call void @call_f64(double 6.0)
@@ -134,11 +114,7 @@ define void @st_param_v2_i8_ii() {
 ; CHECK-NEXT:    { // callseq 5, 0
 ; CHECK-NEXT:    .param .align 2 .b8 param0[2];
 ; CHECK-NEXT:    st.param.v2.b8 [param0], {1, 2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i8, (param0);
 ; CHECK-NEXT:    } // callseq 5
 ; CHECK-NEXT:    ret;
   call void @call_v2_i8(%struct.char2 { i8 1, i8 2 })
@@ -154,11 +130,7 @@ define void @st_param_v2_i8_ir(i8 %val) {
 ; CHECK-NEXT:    { // callseq 6, 0
 ; CHECK-NEXT:    .param .align 2 .b8 param0[2];
 ; CHECK-NEXT:    st.param.v2.b8 [param0], {1, %rs1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i8, (param0);
 ; CHECK-NEXT:    } // callseq 6
 ; CHECK-NEXT:    ret;
   %struct.ir0 = insertvalue %struct.char2 poison, i8 1, 0
@@ -176,11 +148,7 @@ define void @st_param_v2_i8_ri(i8 %val) {
 ; CHECK-NEXT:    { // callseq 7, 0
 ; CHECK-NEXT:    .param .align 2 .b8 param0[2];
 ; CHECK-NEXT:    st.param.v2.b8 [param0], {%rs1, 2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i8, (param0);
 ; CHECK-NEXT:    } // callseq 7
 ; CHECK-NEXT:    ret;
   %struct.ri0 = insertvalue %struct.char2 poison, i8 %val, 0
@@ -198,11 +166,7 @@ define void @st_param_v2_i16_ii() {
 ; CHECK-NEXT:    { // callseq 8, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v2.b16 [param0], {1, 2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i16, (param0);
 ; CHECK-NEXT:    } // callseq 8
 ; CHECK-NEXT:    ret;
   call void @call_v2_i16(%struct.short2 { i16 1, i16 2 })
@@ -218,11 +182,7 @@ define void @st_param_v2_i16_ir(i16 %val) {
 ; CHECK-NEXT:    { // callseq 9, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v2.b16 [param0], {1, %rs1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i16, (param0);
 ; CHECK-NEXT:    } // callseq 9
 ; CHECK-NEXT:    ret;
   %struct.ir0 = insertvalue %struct.short2 poison, i16 1, 0
@@ -240,11 +200,7 @@ define void @st_param_v2_i16_ri(i16 %val) {
 ; CHECK-NEXT:    { // callseq 10, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v2.b16 [param0], {%rs1, 2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i16, (param0);
 ; CHECK-NEXT:    } // callseq 10
 ; CHECK-NEXT:    ret;
   %struct.ri0 = insertvalue %struct.short2 poison, i16 %val, 0
@@ -262,11 +218,7 @@ define void @st_param_v2_i32_ii() {
 ; CHECK-NEXT:    { // callseq 11, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v2.b32 [param0], {1, 2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i32, (param0);
 ; CHECK-NEXT:    } // callseq 11
 ; CHECK-NEXT:    ret;
   call void @call_v2_i32(%struct.int2 { i32 1, i32 2 })
@@ -282,11 +234,7 @@ define void @st_param_v2_i32_ir(i32 %val) {
 ; CHECK-NEXT:    { // callseq 12, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v2.b32 [param0], {1, %r1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i32, (param0);
 ; CHECK-NEXT:    } // callseq 12
 ; CHECK-NEXT:    ret;
   %struct.ir0 = insertvalue %struct.int2 poison, i32 1, 0
@@ -304,11 +252,7 @@ define void @st_param_v2_i32_ri(i32 %val) {
 ; CHECK-NEXT:    { // callseq 13, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v2.b32 [param0], {%r1, 2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i32, (param0);
 ; CHECK-NEXT:    } // callseq 13
 ; CHECK-NEXT:    ret;
   %struct.ri0 = insertvalue %struct.int2 poison, i32 %val, 0
@@ -326,11 +270,7 @@ define void @st_param_v2_i64_ii() {
 ; CHECK-NEXT:    { // callseq 14, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v2.b64 [param0], {1, 2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i64,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i64, (param0);
 ; CHECK-NEXT:    } // callseq 14
 ; CHECK-NEXT:    ret;
   call void @call_v2_i64(%struct.longlong2 { i64 1, i64 2 })
@@ -346,11 +286,7 @@ define void @st_param_v2_i64_ir(i64 %val) {
 ; CHECK-NEXT:    { // callseq 15, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v2.b64 [param0], {1, %rd1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i64,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i64, (param0);
 ; CHECK-NEXT:    } // callseq 15
 ; CHECK-NEXT:    ret;
   %struct.ir0 = insertvalue %struct.longlong2 poison, i64 1, 0
@@ -368,11 +304,7 @@ define void @st_param_v2_i64_ri(i64 %val) {
 ; CHECK-NEXT:    { // callseq 16, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v2.b64 [param0], {%rd1, 2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_i64,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_i64, (param0);
 ; CHECK-NEXT:    } // callseq 16
 ; CHECK-NEXT:    ret;
   %struct.ri0 = insertvalue %struct.longlong2 poison, i64 %val, 0
@@ -390,11 +322,7 @@ define void @st_param_v2_f32_ii(float %val) {
 ; CHECK-NEXT:    { // callseq 17, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v2.b32 [param0], {0f3F800000, 0f40000000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_f32, (param0);
 ; CHECK-NEXT:    } // callseq 17
 ; CHECK-NEXT:    ret;
   call void @call_v2_f32(%struct.float2 { float 1.0, float 2.0 })
@@ -410,11 +338,7 @@ define void @st_param_v2_f32_ir(float %val) {
 ; CHECK-NEXT:    { // callseq 18, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v2.b32 [param0], {0f3F800000, %r1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_f32, (param0);
 ; CHECK-NEXT:    } // callseq 18
 ; CHECK-NEXT:    ret;
   %struct.ir0 = insertvalue %struct.float2 poison, float 1.0, 0
@@ -432,11 +356,7 @@ define void @st_param_v2_f32_ri(float %val) {
 ; CHECK-NEXT:    { // callseq 19, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v2.b32 [param0], {%r1, 0f40000000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_f32, (param0);
 ; CHECK-NEXT:    } // callseq 19
 ; CHECK-NEXT:    ret;
   %struct.ri0 = insertvalue %struct.float2 poison, float %val, 0
@@ -454,11 +374,7 @@ define void @st_param_v2_f64_ii(double %val) {
 ; CHECK-NEXT:    { // callseq 20, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v2.b64 [param0], {0d3FF0000000000000, 0d4000000000000000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_f64,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_f64, (param0);
 ; CHECK-NEXT:    } // callseq 20
 ; CHECK-NEXT:    ret;
   call void @call_v2_f64(%struct.double2 { double 1.0, double 2.0 })
@@ -474,11 +390,7 @@ define void @st_param_v2_f64_ir(double %val) {
 ; CHECK-NEXT:    { // callseq 21, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v2.b64 [param0], {0d3FF0000000000000, %rd1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_f64,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_f64, (param0);
 ; CHECK-NEXT:    } // callseq 21
 ; CHECK-NEXT:    ret;
   %struct.ir0 = insertvalue %struct.double2 poison, double 1.0, 0
@@ -496,11 +408,7 @@ define void @st_param_v2_f64_ri(double %val) {
 ; CHECK-NEXT:    { // callseq 22, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v2.b64 [param0], {%rd1, 0d4000000000000000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v2_f64,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v2_f64, (param0);
 ; CHECK-NEXT:    } // callseq 22
 ; CHECK-NEXT:    ret;
   %struct.ri0 = insertvalue %struct.double2 poison, double %val, 0
@@ -525,11 +433,7 @@ define void @st_param_v4_i8_iiii() {
 ; CHECK-NEXT:    { // callseq 23, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {1, 2, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 23
 ; CHECK-NEXT:    ret;
   call void @call_v4_i8(%struct.char4 { i8 1, i8 2, i8 3, i8 4 })
@@ -547,11 +451,7 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
 ; CHECK-NEXT:    { // callseq 24, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {1, %rs1, %rs2, %rs3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 24
 ; CHECK-NEXT:    ret;
   %struct.irrr0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -573,11 +473,7 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
 ; CHECK-NEXT:    { // callseq 25, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {%rs1, 2, %rs2, %rs3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 25
 ; CHECK-NEXT:    ret;
   %struct.rirr0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -599,11 +495,7 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
 ; CHECK-NEXT:    { // callseq 26, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {%rs1, %rs2, 3, %rs3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 26
 ; CHECK-NEXT:    ret;
   %struct.rrir0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -625,11 +517,7 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
 ; CHECK-NEXT:    { // callseq 27, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {%rs1, %rs2, %rs3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 27
 ; CHECK-NEXT:    ret;
   %struct.rrri0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -650,11 +538,7 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
 ; CHECK-NEXT:    { // callseq 28, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {1, 2, %rs1, %rs2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 28
 ; CHECK-NEXT:    ret;
   %struct.iirr0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -675,11 +559,7 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
 ; CHECK-NEXT:    { // callseq 29, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {1, %rs1, 3, %rs2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 29
 ; CHECK-NEXT:    ret;
   %struct.irir0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -700,11 +580,7 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
 ; CHECK-NEXT:    { // callseq 30, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {1, %rs1, %rs2, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 30
 ; CHECK-NEXT:    ret;
   %struct.irri0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -725,11 +601,7 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
 ; CHECK-NEXT:    { // callseq 31, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {%rs1, 2, 3, %rs2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 31
 ; CHECK-NEXT:    ret;
   %struct.riir0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -750,11 +622,7 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
 ; CHECK-NEXT:    { // callseq 32, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {%rs1, 2, %rs2, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 32
 ; CHECK-NEXT:    ret;
   %struct.riri0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -775,11 +643,7 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
 ; CHECK-NEXT:    { // callseq 33, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {%rs1, %rs2, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 33
 ; CHECK-NEXT:    ret;
   %struct.rrii0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -799,11 +663,7 @@ define void @st_param_v4_i8_iiir(i8 %d) {
 ; CHECK-NEXT:    { // callseq 34, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {1, 2, 3, %rs1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 34
 ; CHECK-NEXT:    ret;
   %struct.iiir0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -823,11 +683,7 @@ define void @st_param_v4_i8_iiri(i8 %c) {
 ; CHECK-NEXT:    { // callseq 35, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {1, 2, %rs1, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 35
 ; CHECK-NEXT:    ret;
   %struct.iiri0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -847,11 +703,7 @@ define void @st_param_v4_i8_irii(i8 %b) {
 ; CHECK-NEXT:    { // callseq 36, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {1, %rs1, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 36
 ; CHECK-NEXT:    ret;
   %struct.irii0 = insertvalue %struct.char4 poison, i8 1, 0
@@ -871,11 +723,7 @@ define void @st_param_v4_i8_riii(i8 %a) {
 ; CHECK-NEXT:    { // callseq 37, 0
 ; CHECK-NEXT:    .param .align 4 .b8 param0[4];
 ; CHECK-NEXT:    st.param.v4.b8 [param0], {%rs1, 2, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i8,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i8, (param0);
 ; CHECK-NEXT:    } // callseq 37
 ; CHECK-NEXT:    ret;
   %struct.riii0 = insertvalue %struct.char4 poison, i8 %a, 0
@@ -895,11 +743,7 @@ define void @st_param_v4_i16_iiii() {
 ; CHECK-NEXT:    { // callseq 38, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {1, 2, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 38
 ; CHECK-NEXT:    ret;
   call void @call_v4_i16(%struct.short4 { i16 1, i16 2, i16 3, i16 4 })
@@ -917,11 +761,7 @@ define void @st_param_v4_i16_irrr(i16 %b, i16 %c, i16 %d) {
 ; CHECK-NEXT:    { // callseq 39, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {1, %rs1, %rs2, %rs3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 39
 ; CHECK-NEXT:    ret;
   %struct.irrr0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -943,11 +783,7 @@ define void @st_param_v4_i16_rirr(i16 %a, i16 %c, i16 %d) {
 ; CHECK-NEXT:    { // callseq 40, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {%rs1, 2, %rs2, %rs3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 40
 ; CHECK-NEXT:    ret;
   %struct.rirr0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -969,11 +805,7 @@ define void @st_param_v4_i16_rrir(i16 %a, i16 %b, i16 %d) {
 ; CHECK-NEXT:    { // callseq 41, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {%rs1, %rs2, 3, %rs3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 41
 ; CHECK-NEXT:    ret;
   %struct.rrir0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -995,11 +827,7 @@ define void @st_param_v4_i16_rrri(i16 %a, i16 %b, i16 %c) {
 ; CHECK-NEXT:    { // callseq 42, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {%rs1, %rs2, %rs3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 42
 ; CHECK-NEXT:    ret;
   %struct.rrri0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1020,11 +848,7 @@ define void @st_param_v4_i16_iirr(i16 %c, i16 %d) {
 ; CHECK-NEXT:    { // callseq 43, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {1, 2, %rs1, %rs2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 43
 ; CHECK-NEXT:    ret;
   %struct.iirr0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1045,11 +869,7 @@ define void @st_param_v4_i16_irir(i16 %b, i16 %d) {
 ; CHECK-NEXT:    { // callseq 44, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {1, %rs1, 3, %rs2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 44
 ; CHECK-NEXT:    ret;
   %struct.irir0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1070,11 +890,7 @@ define void @st_param_v4_i16_irri(i16 %b, i16 %c) {
 ; CHECK-NEXT:    { // callseq 45, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {1, %rs1, %rs2, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 45
 ; CHECK-NEXT:    ret;
   %struct.irri0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1095,11 +911,7 @@ define void @st_param_v4_i16_riir(i16 %a, i16 %d) {
 ; CHECK-NEXT:    { // callseq 46, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {%rs1, 2, 3, %rs2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 46
 ; CHECK-NEXT:    ret;
   %struct.riir0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1120,11 +932,7 @@ define void @st_param_v4_i16_riri(i16 %a, i16 %c) {
 ; CHECK-NEXT:    { // callseq 47, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {%rs1, 2, %rs2, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 47
 ; CHECK-NEXT:    ret;
   %struct.riri0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1145,11 +953,7 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) {
 ; CHECK-NEXT:    { // callseq 48, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {%rs1, %rs2, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 48
 ; CHECK-NEXT:    ret;
   %struct.rrii0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1169,11 +973,7 @@ define void @st_param_v4_i16_iiir(i16 %d) {
 ; CHECK-NEXT:    { // callseq 49, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {1, 2, 3, %rs1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 49
 ; CHECK-NEXT:    ret;
   %struct.iiir0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1193,11 +993,7 @@ define void @st_param_v4_i16_iiri(i16 %c) {
 ; CHECK-NEXT:    { // callseq 50, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {1, 2, %rs1, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 50
 ; CHECK-NEXT:    ret;
   %struct.iiri0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1217,11 +1013,7 @@ define void @st_param_v4_i16_irii(i16 %b) {
 ; CHECK-NEXT:    { // callseq 51, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {1, %rs1, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 51
 ; CHECK-NEXT:    ret;
   %struct.irii0 = insertvalue %struct.short4 poison, i16 1, 0
@@ -1241,11 +1033,7 @@ define void @st_param_v4_i16_riii(i16 %a) {
 ; CHECK-NEXT:    { // callseq 52, 0
 ; CHECK-NEXT:    .param .align 8 .b8 param0[8];
 ; CHECK-NEXT:    st.param.v4.b16 [param0], {%rs1, 2, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i16,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i16, (param0);
 ; CHECK-NEXT:    } // callseq 52
 ; CHECK-NEXT:    ret;
   %struct.riii0 = insertvalue %struct.short4 poison, i16 %a, 0
@@ -1265,11 +1053,7 @@ define void @st_param_v4_i32_iiii() {
 ; CHECK-NEXT:    { // callseq 53, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {1, 2, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 53
 ; CHECK-NEXT:    ret;
   call void @call_v4_i32(%struct.int4 { i32 1, i32 2, i32 3, i32 4 })
@@ -1287,11 +1071,7 @@ define void @st_param_v4_i32_irrr(i32 %b, i32 %c, i32 %d) {
 ; CHECK-NEXT:    { // callseq 54, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {1, %r1, %r2, %r3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 54
 ; CHECK-NEXT:    ret;
   %struct.irrr0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1313,11 +1093,7 @@ define void @st_param_v4_i32_rirr(i32 %a, i32 %c, i32 %d) {
 ; CHECK-NEXT:    { // callseq 55, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, 2, %r2, %r3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 55
 ; CHECK-NEXT:    ret;
   %struct.rirr0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1339,11 +1115,7 @@ define void @st_param_v4_i32_rrir(i32 %a, i32 %b, i32 %d) {
 ; CHECK-NEXT:    { // callseq 56, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, %r2, 3, %r3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 56
 ; CHECK-NEXT:    ret;
   %struct.rrir0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1365,11 +1137,7 @@ define void @st_param_v4_i32_rrri(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    { // callseq 57, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, %r2, %r3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 57
 ; CHECK-NEXT:    ret;
   %struct.rrri0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1390,11 +1158,7 @@ define void @st_param_v4_i32_iirr(i32 %c, i32 %d) {
 ; CHECK-NEXT:    { // callseq 58, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {1, 2, %r1, %r2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 58
 ; CHECK-NEXT:    ret;
   %struct.iirr0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1415,11 +1179,7 @@ define void @st_param_v4_i32_irir(i32 %b, i32 %d) {
 ; CHECK-NEXT:    { // callseq 59, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {1, %r1, 3, %r2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 59
 ; CHECK-NEXT:    ret;
   %struct.irir0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1440,11 +1200,7 @@ define void @st_param_v4_i32_irri(i32 %b, i32 %c) {
 ; CHECK-NEXT:    { // callseq 60, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {1, %r1, %r2, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 60
 ; CHECK-NEXT:    ret;
   %struct.irri0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1465,11 +1221,7 @@ define void @st_param_v4_i32_riir(i32 %a, i32 %d) {
 ; CHECK-NEXT:    { // callseq 61, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, 2, 3, %r2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 61
 ; CHECK-NEXT:    ret;
   %struct.riir0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1490,11 +1242,7 @@ define void @st_param_v4_i32_riri(i32 %a, i32 %c) {
 ; CHECK-NEXT:    { // callseq 62, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, 2, %r2, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 62
 ; CHECK-NEXT:    ret;
   %struct.riri0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1515,11 +1263,7 @@ define void @st_param_v4_i32_rrii(i32 %a, i32 %b) {
 ; CHECK-NEXT:    { // callseq 63, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, %r2, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 63
 ; CHECK-NEXT:    ret;
   %struct.rrii0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1539,11 +1283,7 @@ define void @st_param_v4_i32_iiir(i32 %d) {
 ; CHECK-NEXT:    { // callseq 64, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {1, 2, 3, %r1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 64
 ; CHECK-NEXT:    ret;
   %struct.iiir0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1563,11 +1303,7 @@ define void @st_param_v4_i32_iiri(i32 %c) {
 ; CHECK-NEXT:    { // callseq 65, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {1, 2, %r1, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 65
 ; CHECK-NEXT:    ret;
   %struct.iiri0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1587,11 +1323,7 @@ define void @st_param_v4_i32_irii(i32 %b) {
 ; CHECK-NEXT:    { // callseq 66, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {1, %r1, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 66
 ; CHECK-NEXT:    ret;
   %struct.irii0 = insertvalue %struct.int4 poison, i32 1, 0
@@ -1611,11 +1343,7 @@ define void @st_param_v4_i32_riii(i32 %a) {
 ; CHECK-NEXT:    { // callseq 67, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, 2, 3, 4};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_i32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_i32, (param0);
 ; CHECK-NEXT:    } // callseq 67
 ; CHECK-NEXT:    ret;
   %struct.riii0 = insertvalue %struct.int4 poison, i32 %a, 0
@@ -1635,11 +1363,7 @@ define void @st_param_v4_f32_iiii() {
 ; CHECK-NEXT:    { // callseq 68, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {0f3F800000, 0f40000000, 0f40400000, 0f40800000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 68
 ; CHECK-NEXT:    ret;
   call void @call_v4_f32(%struct.float4 { float 1.0, float 2.0, float 3.0, float 4.0 })
@@ -1657,11 +1381,7 @@ define void @st_param_v4_f32_irrr(float %b, float %c, float %d) {
 ; CHECK-NEXT:    { // callseq 69, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {0f3F800000, %r1, %r2, %r3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 69
 ; CHECK-NEXT:    ret;
   %struct.irrr0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1683,11 +1403,7 @@ define void @st_param_v4_f32_rirr(float %a, float %c, float %d) {
 ; CHECK-NEXT:    { // callseq 70, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, 0f40000000, %r2, %r3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 70
 ; CHECK-NEXT:    ret;
   %struct.rirr0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1709,11 +1425,7 @@ define void @st_param_v4_f32_rrir(float %a, float %b, float %d) {
 ; CHECK-NEXT:    { // callseq 71, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, %r2, 0f40400000, %r3};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 71
 ; CHECK-NEXT:    ret;
   %struct.rrir0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1735,11 +1447,7 @@ define void @st_param_v4_f32_rrri(float %a, float %b, float %c) {
 ; CHECK-NEXT:    { // callseq 72, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, %r2, %r3, 0f40800000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 72
 ; CHECK-NEXT:    ret;
   %struct.rrri0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1760,11 +1468,7 @@ define void @st_param_v4_f32_iirr(float %c, float %d) {
 ; CHECK-NEXT:    { // callseq 73, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {0f3F800000, 0f40000000, %r1, %r2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 73
 ; CHECK-NEXT:    ret;
   %struct.iirr0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1785,11 +1489,7 @@ define void @st_param_v4_f32_irir(float %b, float %d) {
 ; CHECK-NEXT:    { // callseq 74, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {0f3F800000, %r1, 0f40400000, %r2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 74
 ; CHECK-NEXT:    ret;
   %struct.irir0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1810,11 +1510,7 @@ define void @st_param_v4_f32_irri(float %b, float %c) {
 ; CHECK-NEXT:    { // callseq 75, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {0f3F800000, %r1, %r2, 0f40800000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 75
 ; CHECK-NEXT:    ret;
   %struct.irri0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1835,11 +1531,7 @@ define void @st_param_v4_f32_riir(float %a, float %d) {
 ; CHECK-NEXT:    { // callseq 76, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, 0f40000000, 0f40400000, %r2};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 76
 ; CHECK-NEXT:    ret;
   %struct.riir0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1860,11 +1552,7 @@ define void @st_param_v4_f32_riri(float %a, float %c) {
 ; CHECK-NEXT:    { // callseq 77, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, 0f40000000, %r2, 0f40800000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 77
 ; CHECK-NEXT:    ret;
   %struct.riri0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1885,11 +1573,7 @@ define void @st_param_v4_f32_rrii(float %a, float %b) {
 ; CHECK-NEXT:    { // callseq 78, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, %r2, 0f40400000, 0f40800000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 78
 ; CHECK-NEXT:    ret;
   %struct.rrii0 = insertvalue %struct.float4 poison, float %a, 0
@@ -1909,11 +1593,7 @@ define void @st_param_v4_f32_iiir(float %d) {
 ; CHECK-NEXT:    { // callseq 79, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {0f3F800000, 0f40000000, 0f40400000, %r1};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 79
 ; CHECK-NEXT:    ret;
   %struct.iiir0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1933,11 +1613,7 @@ define void @st_param_v4_f32_iiri(float %c) {
 ; CHECK-NEXT:    { // callseq 80, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {0f3F800000, 0f40000000, %r1, 0f40800000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 80
 ; CHECK-NEXT:    ret;
   %struct.iiri0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1957,11 +1633,7 @@ define void @st_param_v4_f32_irii(float %b) {
 ; CHECK-NEXT:    { // callseq 81, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {0f3F800000, %r1, 0f40400000, 0f40800000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 81
 ; CHECK-NEXT:    ret;
   %struct.irii0 = insertvalue %struct.float4 poison, float 1.0, 0
@@ -1981,11 +1653,7 @@ define void @st_param_v4_f32_riii(float %a) {
 ; CHECK-NEXT:    { // callseq 82, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[16];
 ; CHECK-NEXT:    st.param.v4.b32 [param0], {%r1, 0f40000000, 0f40400000, 0f40800000};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_v4_f32,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_v4_f32, (param0);
 ; CHECK-NEXT:    } // callseq 82
 ; CHECK-NEXT:    ret;
   %struct.riii0 = insertvalue %struct.float4 poison, float %a, 0
@@ -2011,11 +1679,7 @@ define void @st_param_bfloat() {
 ; CHECK-NEXT:    { // callseq 83, 0
 ; CHECK-NEXT:    .param .align 2 .b8 param0[2];
 ; CHECK-NEXT:    st.param.b16 [param0], %rs1;
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    call_bfloat,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni call_bfloat, (param0);
 ; CHECK-NEXT:    } // callseq 83
 ; CHECK-NEXT:    ret;
   %five = bitcast i16 16640 to bfloat
diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll
index 52415b05e03d0..5b31b5e24bc68 100644
--- a/llvm/test/CodeGen/NVPTX/store-undef.ll
+++ b/llvm/test/CodeGen/NVPTX/store-undef.ll
@@ -16,11 +16,7 @@ define void @test_store_param_undef() {
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    { // callseq 0, 0
 ; CHECK-NEXT:    .param .align 16 .b8 param0[32];
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    test_call,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni test_call, (param0);
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    ret;
   call void @test_call(%struct.T undef)
@@ -41,11 +37,7 @@ define void @test_store_param_def(i64 %param0, i32 %param1) {
 ; CHECK-NEXT:    st.param.b64 [param0], %rd1;
 ; CHECK-NEXT:    st.param.v2.b32 [param0+8], {%r2, %r1};
 ; CHECK-NEXT:    st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5};
-; CHECK-NEXT:    call.uni
-; CHECK-NEXT:    test_call,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni test_call, (param0);
 ; CHECK-NEXT:    } // callseq 1
 ; CHECK-NEXT:    ret;
   %V2 = insertelement <2 x i32> undef, i32 %param1, i32 1
diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index a97a8b5822f99..d6961a9541776 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -71,11 +71,7 @@ define ptx_kernel void @baz(ptr %red, i32 %idx) {
 ; CHECK-NEXT:    .param .b64 param0;
 ; CHECK-NEXT:    st.param.b64 [param0], %rd3;
 ; CHECK-NEXT:    .param .b32 retval0;
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    texfunc,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), texfunc, (param0);
 ; CHECK-NEXT:    ld.param.b32 %r6, [retval0];
 ; CHECK-NEXT:    } // callseq 0
 ; CHECK-NEXT:    add.rn.f32 %r8, %r2, %r6;
diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
index efbac868dba38..178ee7ff6db18 100644
--- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
@@ -33,11 +33,7 @@
 ; CHECK-DAG:    st.param.b8  [param0+3], [[P2_1_or]];
 ; CHECK-DAG:    st.param.b8  [param0+4], [[P2_1]];
 ; CHECK:        .param .align 8 .b8 retval0[16];
-; CHECK-NEXT:   call.uni (retval0),
-; CHECK-NEXT:   test_s_i8i16p,
-; CHECK-NEXT:   (
-; CHECK-NEXT:   param0
-; CHECK-NEXT:   );
+; CHECK-NEXT:   call.uni (retval0), test_s_i8i16p, (param0);
 ; CHECK-DAG:    ld.param.b16 [[R0:%rs[0-9]+]],   [retval0];
 ; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+3];
 ; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+4];
@@ -80,11 +76,7 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
 ; CHECK-DAG:    st.param.b8  [param0+7], [[P2_2_shr]];
 ; CHECK-DAG:    st.param.b8  [param0+8], [[P2_3]];
 ; CHECK:        .param .align 8 .b8 retval0[24];
-; CHECK-NEXT:   call.uni (retval0),
-; CHECK-NEXT:   test_s_i8i32p,
-; CHECK-NEXT:   (
-; CHECK-NEXT:   param0
-; CHECK-NEXT:   );
+; CHECK-NEXT:   call.uni (retval0), test_s_i8i32p, (param0);
 ; CHECK-DAG:    ld.param.b32 [[R0:%r[0-9]+]],   [retval0];
 ; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+5];
 ; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+6];
@@ -147,11 +139,7 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
 ; CHECK-DAG:    st.param.b8  [param0+15], [[P2_bfe_5]];
 ; CHECK-DAG:    st.param.b8  [param0+16], [[P2_bfe_6]];
 ; CHECK:        .param .align 8 .b8 retval0[32];
-; CHECK-NEXT:   call.uni (retval0),
-; CHECK-NEXT:   test_s_i8i64p,
-; CHECK-NEXT:   (
-; CHECK-NEXT:   param0
-; CHECK-NEXT:   );
+; CHECK-NEXT:   call.uni (retval0), test_s_i8i64p, (param0);
 ; CHECK-DAG:    ld.param.b64 [[R0:%rd[0-9]+]],   [retval0];
 ; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+9];
 ; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+10];
@@ -192,11 +180,7 @@ define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
 ; CHECK-DAG:    st.param.b8  [param0+3], [[P2_1_or]];
 ; CHECK-DAG:    st.param.b8  [param0+4], [[P2_1]];
 ; CHECK:        .param .align 8 .b8 retval0[16];
-; CHECK-NEXT:   call.uni (retval0),
-; CHECK-NEXT:   test_s_i8f16p,
-; CHECK-NEXT:   (
-; CHECK-NEXT:   param0
-; CHECK-NEXT:   );
+; CHECK-NEXT:   call.uni (retval0), test_s_i8f16p, (param0);
 ; CHECK-DAG:    ld.param.b16 [[R0:%rs[0-9]+]],     [retval0];
 ; CHECK-DAG:    ld.param.b8  [[R2I_0:%rs[0-9]+]], [retval0+3];
 ; CHECK-DAG:    ld.param.b8  [[R2I_1:%rs[0-9]+]], [retval0+4];
@@ -239,11 +223,7 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
 ; CHECK-DAG:    st.param.b8  [param0+7], [[P2_2_shr]];
 ; CHECK-DAG:    st.param.b8  [param0+8], [[P2_3]];
 ; CHECK:        .param .align 8 .b8 retval0[24];
-; CHECK-NEXT:   call.uni (retval0),
-; CHECK-NEXT:   test_s_i8f16x2p,
-; CHECK-NEXT:   (
-; CHECK-NEXT:   param0
-; CHECK-NEXT:   );
+; CHECK-NEXT:   call.uni (retval0), test_s_i8f16x2p, (param0);
 ; CHECK-DAG:    ld.param.b32 [[R0:%r[0-9]+]],   [retval0];
 ; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+5];
 ; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+6];
@@ -286,11 +266,7 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
 ; CHECK-DAG:    st.param.b8  [param0+7], [[P2_2_shr]];
 ; CHECK-DAG:    st.param.b8  [param0+8], [[P2_3]];
 ; CHECK:        .param .align 8 .b8 retval0[24];
-; CHECK-NEXT:   call.uni (retval0),
-; CHECK-NEXT:   test_s_i8f32p,
-; CHECK-NEXT:   (
-; CHECK-NEXT:   param0
-; CHECK-NEXT:   );
+; CHECK-NEXT:   call.uni (retval0), test_s_i8f32p, (param0);
 ; CHECK-DAG:    ld.param.b32 [[R0:%r[0-9]+]],    [retval0];
 ; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+5];
 ; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+6];
@@ -353,11 +329,7 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
 ; CHECK-DAG:    st.param.b8  [param0+15], [[P2_bfe_5]];
 ; CHECK-DAG:    st.param.b8  [param0+16], [[P2_bfe_6]];
 ; CHECK:        .param .align 8 .b8 retval0[32];
-; CHECK-NEXT:   call.uni (retval0),
-; CHECK-NEXT:   test_s_i8f64p,
-; CHECK-NEXT:   (
-; CHECK-NEXT:   param0
-; CHECK-NEXT:   );
+; CHECK-NEXT:   call.uni (retval0), test_s_i8f64p, (param0);
 ; CHECK-DAG:    ld.param.b64 [[R0:%rd[0-9]+]],   [retval0];
 ; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+9];
 ; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+10];
diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll
index 80cf938d48b53..618c7ed0c4997 100644
--- a/llvm/test/CodeGen/NVPTX/unreachable.ll
+++ b/llvm/test/CodeGen/NVPTX/unreachable.ll
@@ -28,10 +28,7 @@ define ptx_kernel void @kernel_func() {
 ; NO-TRAP-UNREACHABLE-EMPTY:
 ; NO-TRAP-UNREACHABLE-NEXT:  // %bb.0:
 ; NO-TRAP-UNREACHABLE-NEXT:    { // callseq 0, 0
-; NO-TRAP-UNREACHABLE-NEXT:    call.uni
-; NO-TRAP-UNREACHABLE-NEXT:    throw,
-; NO-TRAP-UNREACHABLE-NEXT:    (
-; NO-TRAP-UNREACHABLE-NEXT:    );
+; NO-TRAP-UNREACHABLE-NEXT:    call.uni throw, ();
 ; NO-TRAP-UNREACHABLE-NEXT:    } // callseq 0
 ; NO-TRAP-UNREACHABLE-NEXT:    // begin inline asm
 ; NO-TRAP-UNREACHABLE-NEXT:    exit;
@@ -43,10 +40,7 @@ define ptx_kernel void @kernel_func() {
 ; NO-TRAP-AFTER-NORETURN-EMPTY:
 ; NO-TRAP-AFTER-NORETURN-NEXT:  // %bb.0:
 ; NO-TRAP-AFTER-NORETURN-NEXT:    { // callseq 0, 0
-; NO-TRAP-AFTER-NORETURN-NEXT:    call.uni
-; NO-TRAP-AFTER-NORETURN-NEXT:    throw,
-; NO-TRAP-AFTER-NORETURN-NEXT:    (
-; NO-TRAP-AFTER-NORETURN-NEXT:    );
+; NO-TRAP-AFTER-NORETURN-NEXT:    call.uni throw, ();
 ; NO-TRAP-AFTER-NORETURN-NEXT:    } // callseq 0
 ; NO-TRAP-AFTER-NORETURN-NEXT:    // begin inline asm
 ; NO-TRAP-AFTER-NORETURN-NEXT:    exit;
@@ -59,10 +53,7 @@ define ptx_kernel void @kernel_func() {
 ; TRAP-EMPTY:
 ; TRAP-NEXT:  // %bb.0:
 ; TRAP-NEXT:    { // callseq 0, 0
-; TRAP-NEXT:    call.uni
-; TRAP-NEXT:    throw,
-; TRAP-NEXT:    (
-; TRAP-NEXT:    );
+; TRAP-NEXT:    call.uni throw, ();
 ; TRAP-NEXT:    } // callseq 0
 ; TRAP-NEXT:    trap; exit;
 ;
@@ -72,10 +63,7 @@ define ptx_kernel void @kernel_func() {
 ; BUG-FIXED-EMPTY:
 ; BUG-FIXED-NEXT:  // %bb.0:
 ; BUG-FIXED-NEXT:    { // callseq 0, 0
-; BUG-FIXED-NEXT:    call.uni
-; BUG-FIXED-NEXT:    throw,
-; BUG-FIXED-NEXT:    (
-; BUG-FIXED-NEXT:    );
+; BUG-FIXED-NEXT:    call.uni throw, ();
 ; BUG-FIXED-NEXT:    } // callseq 0
 ; BUG-FIXED-NEXT:    trap;
   call void @throw()
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index ddaa9fd831af7..ca1b722527a89 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -126,12 +126,7 @@ define dso_local i32 @foo() {
 ; CHECK-PTX-NEXT:    .param .b64 param1;
 ; CHECK-PTX-NEXT:    st.param.b64 [param1], %rd4;
 ; CHECK-PTX-NEXT:    .param .b32 retval0;
-; CHECK-PTX-NEXT:    call.uni (retval0),
-; CHECK-PTX-NEXT:    variadics1,
-; CHECK-PTX-NEXT:    (
-; CHECK-PTX-NEXT:    param0,
-; CHECK-PTX-NEXT:    param1
-; CHECK-PTX-NEXT:    );
+; CHECK-PTX-NEXT:    call.uni (retval0), variadics1, (param0, param1);
 ; CHECK-PTX-NEXT:    ld.param.b32 %r2, [retval0];
 ; CHECK-PTX-NEXT:    } // callseq 0
 ; CHECK-PTX-NEXT:    st.param.b32 [func_retval0], %r2;
@@ -238,12 +233,7 @@ define dso_local i32 @bar() {
 ; CHECK-PTX-NEXT:    .param .b64 param1;
 ; CHECK-PTX-NEXT:    st.param.b64 [param1], %rd4;
 ; CHECK-PTX-NEXT:    .param .b32 retval0;
-; CHECK-PTX-NEXT:    call.uni (retval0),
-; CHECK-PTX-NEXT:    variadics2,
-; CHECK-PTX-NEXT:    (
-; CHECK-PTX-NEXT:    param0,
-; CHECK-PTX-NEXT:    param1
-; CHECK-PTX-NEXT:    );
+; CHECK-PTX-NEXT:    call.uni (retval0), variadics2, (param0, param1);
 ; CHECK-PTX-NEXT:    ld.param.b32 %r2, [retval0];
 ; CHECK-PTX-NEXT:    } // callseq 1
 ; CHECK-PTX-NEXT:    st.param.b32 [func_retval0], %r2;
@@ -315,12 +305,7 @@ define dso_local i32 @baz() {
 ; CHECK-PTX-NEXT:    .param .b64 param1;
 ; CHECK-PTX-NEXT:    st.param.b64 [param1], %rd1;
 ; CHECK-PTX-NEXT:    .param .b32 retval0;
-; CHECK-PTX-NEXT:    call.uni (retval0),
-; CHECK-PTX-NEXT:    variadics3,
-; CHECK-PTX-NEXT:    (
-; CHECK-PTX-NEXT:    param0,
-; CHECK-PTX-NEXT:    param1
-; CHECK-PTX-NEXT:    );
+; CHECK-PTX-NEXT:    call.uni (retval0), variadics3, (param0, param1);
 ; CHECK-PTX-NEXT:    ld.param.b32 %r2, [retval0];
 ; CHECK-PTX-NEXT:    } // callseq 2
 ; CHECK-PTX-NEXT:    st.param.b32 [func_retval0], %r2;
@@ -397,12 +382,7 @@ define dso_local void @qux() {
 ; CHECK-PTX-NEXT:    .param .b64 param1;
 ; CHECK-PTX-NEXT:    st.param.b64 [param1], %rd8;
 ; CHECK-PTX-NEXT:    .param .b32 retval0;
-; CHECK-PTX-NEXT:    call.uni (retval0),
-; CHECK-PTX-NEXT:    variadics4,
-; CHECK-PTX-NEXT:    (
-; CHECK-PTX-NEXT:    param0,
-; CHECK-PTX-NEXT:    param1
-; CHECK-PTX-NEXT:    );
+; CHECK-PTX-NEXT:    call.uni (retval0), variadics4, (param0, param1);
 ; CHECK-PTX-NEXT:    ld.param.b32 %r1, [retval0];
 ; CHECK-PTX-NEXT:    } // callseq 3
 ; CHECK-PTX-NEXT:    ret;
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
index b8779b9d54ea7..f466b1de9fb5a 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected
@@ -19,11 +19,7 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
 ; CHECK-NEXT:    st.param.v2.b64 [param0], {%rd2, %rd1};
 ; CHECK-NEXT:    st.param.v2.b64 [param0+16], {%rd4, %rd3};
 ; CHECK-NEXT:    .param .align 16 .b8 retval0[32];
-; CHECK-NEXT:    call.uni (retval0),
-; CHECK-NEXT:    callee_St8x4,
-; CHECK-NEXT:    (
-; CHECK-NEXT:    param0
-; CHECK-NEXT:    );
+; CHECK-NEXT:    call.uni (retval0), callee_St8x4, (param0);
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd5, %rd6}, [retval0];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd7, %rd8}, [retval0+16];
 ; CHECK-NEXT:    } // callseq 0

>From e365cfe71946b4c1bcea4867942487a60fe0aa3c Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Wed, 25 Jun 2025 02:45:41 +0000
Subject: [PATCH 3/3] address comments

---
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 12 --------
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h   |  1 -
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td     | 31 ++++++++++++---------
 3 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 849274f3678ca..61fe8a53cb63a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -163,10 +163,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
     if (tryStoreParam(N))
       return;
     break;
-  case ISD::INTRINSIC_WO_CHAIN:
-    if (tryIntrinsicNoChain(N))
-      return;
-    break;
   case ISD::INTRINSIC_W_CHAIN:
     if (tryIntrinsicChain(N))
       return;
@@ -902,14 +898,6 @@ NVPTXDAGToDAGISel::insertMemoryInstructionFence(SDLoc DL, SDValue &Chain,
   return {InstructionOrdering, Scope};
 }
 
-bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
-  unsigned IID = N->getConstantOperandVal(0);
-  switch (IID) {
-  default:
-    return false;
-  }
-}
-
 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
   SDValue Src = N->getOperand(0);
   AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index ff58e4486a222..92b5c773258ed 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -69,7 +69,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
 #include "NVPTXGenDAGISel.inc"
 
   void Select(SDNode *N) override;
-  bool tryIntrinsicNoChain(SDNode *N);
   bool tryIntrinsicChain(SDNode *N);
   bool tryIntrinsicVoid(SDNode *N);
   void SelectTexSurfHandle(SDNode *N);
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index bc506dd8a7114..1ea6d98a1df8e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1741,11 +1741,14 @@ def FMOV64i : MOVi<B64, "b64", f64, f64imm, fpimm>;
 
 
 def to_tglobaladdr : SDNodeXForm<globaladdr, [{
-  return CurDAG->getTargetGlobalAddress(N->getGlobal(), SDLoc(N), N->getValueType(0), N->getOffset(), N->getTargetFlags());
+  return CurDAG->getTargetGlobalAddress(N->getGlobal(), SDLoc(N),
+                                        N->getValueType(0), N->getOffset(),
+                                        N->getTargetFlags());
 }]>;
 
 def to_texternsym : SDNodeXForm<externalsym, [{
-  return CurDAG->getTargetExternalSymbol(N->getSymbol(), N->getValueType(0), N->getTargetFlags());
+  return CurDAG->getTargetExternalSymbol(N->getSymbol(), N->getValueType(0),
+                                         N->getTargetFlags());
 }]>;
 
 def to_tframeindex : SDNodeXForm<frameindex, [{
@@ -2139,30 +2142,32 @@ foreach is_convergent = [0, 1] in {
 
   let isCall = 1, isConvergent = is_convergent in {
     def CALL # convergent_suffix :
-      NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params, i32imm:$proto),
+      NVPTXInst<(outs),
+                (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params, 
+                     i32imm:$proto),
                 "call${rets:RetList} $addr, (${params:ParamList}), prototype_$proto;", []>;
 
     def CALL_UNI # convergent_suffix :
-      NVPTXInst<(outs), (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
+      NVPTXInst<(outs),
+                (ins ADDR_base:$addr, CallOperand:$rets, CallOperand:$params),
                 "call.uni${rets:RetList} $addr, (${params:ParamList});", []>;
   }
 
-  defvar inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
-  defvar inst_uni = !cast<NVPTXInst>("CALL_UNI" # convergent_suffix);
-
+  defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix);
   def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, globaladdr:$addr, imm:$proto),
-            (inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>;
+            (call_inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>;
   def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i32:$addr, imm:$proto),
-            (inst $addr, imm:$rets, imm:$params, imm:$proto)>;
+            (call_inst $addr, imm:$rets, imm:$params, imm:$proto)>;
   def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i64:$addr, imm:$proto),
-            (inst $addr, imm:$rets, imm:$params, imm:$proto)>;
+            (call_inst $addr, imm:$rets, imm:$params, imm:$proto)>;
 
+  defvar call_uni_inst = !cast<NVPTXInst>("CALL_UNI" # convergent_suffix);
   def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, globaladdr:$addr, 0),
-            (inst_uni (to_tglobaladdr $addr), imm:$rets, imm:$params)>;
+            (call_uni_inst (to_tglobaladdr $addr), imm:$rets, imm:$params)>;
   def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i32:$addr, 0),
-            (inst_uni $addr, imm:$rets, imm:$params)>;
+            (call_uni_inst $addr, imm:$rets, imm:$params)>;
   def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i64:$addr, 0),
-            (inst_uni $addr, imm:$rets, imm:$params)>;
+            (call_uni_inst $addr, imm:$rets, imm:$params)>;
 }
 
 def LoadParamMemI64    : LoadParamMemInst<B64, ".b64">;



More information about the cfe-commits mailing list