[Mlir-commits] [mlir] [MLIR][NVVM][Refactor] Refactor intrinsic lowering for NVVM Ops (PR #157079)

Fri Sep 5 04:42:18 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-mlir-llvm

Author: Srinivasa Ravi (Wolfram70)

<details>
<summary>Changes</summary>

This change adds standardizes the usage of `getIntrinsicIDAndArgsMaybeWithTypes`
across NVVM Ops for intrinsic lowering which returns the intrinsic ID, arguments,
and in the case of overloaded intrinsics, the types of the arguments as well.

It also moves the `get*Intrinsic*` functions defined in `NVVMToLLVMIRTranslation.cpp`
to `NVVMDialect.cpp` and refactors them to be of the same signature as other 
`getIntrinsicIDAndArgsMaybeWithTypes` functions to keep consistency and allow for a 
future refactor.

---

Patch is 84.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157079.diff


4 Files Affected:

- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h (+8-4) 
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+161-163) 
- (modified) mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp (+781-97) 
- (modified) mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp (-374) 


``````````diff

diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
index 6137bb087c576..3c463db548011 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
@@ -55,10 +55,14 @@ enum NVVMMemorySpace {
   kSharedClusterMemorySpace = 7,
 };
 
-/// A pair type of LLVM's Intrinsic ID and args (which are llvm values).
-/// This type is returned by the getIntrinsicIDAndArgs() methods.
-using IDArgPair =
-    std::pair<llvm::Intrinsic::ID, llvm::SmallVector<llvm::Value *>>;
+/// A tuple type of LLVM's Intrinsic ID, args (which are llvm values),
+/// and args types (which are llvm types).
+/// Args types are only required for overloaded intrinsics to provide the
+/// correct argument types to the createIntrinsicCall() method.
+/// This type is returned by the getIIDAndArgsWithTypes() methods.
+using IIDArgsWithTypes =
+    std::tuple<llvm::Intrinsic::ID, llvm::SmallVector<llvm::Value *>,
+               llvm::SmallVector<llvm::Type *>>;
 
 /// Return the element type and number of elements associated with a wmma matrix
 /// of given chracteristics. This matches the logic in IntrinsicsNVVM.td
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 9d93b4efe7a5b..168060aae2c3e 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -410,9 +410,16 @@ def NVVM_ReduxOp :
 
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-redux-sync)
   }];
+  let extraClassDeclaration = [{
+    static NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op,
+      LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+  }];
   string llvmBuilder = [{
-      auto intId = getReduxIntrinsicId($_resultType, $kind, $abs, $nan);
-      $res = createIntrinsicCall(builder, intId, {$val, $mask_and_clamp});
+      auto [id, args, types] = 
+      NVVM::ReduxOp::getIIDAndArgsWithTypes(
+                        *op, moduleTranslation, builder);
+      $res = createIntrinsicCall(builder, id, args);
   }];
   let assemblyFormat = [{
     $kind $val `,` $mask_and_clamp  attr-dict `:` type($val) `->` type($res)
@@ -876,11 +883,17 @@ def NVVM_FenceProxyAcquireOp : NVVM_Op<"fence.proxy.acquire">,
   }];
 
   let assemblyFormat = "$scope $addr `,` $size (`from_proxy` `=` $fromProxy^)? (`to_proxy` `=` $toProxy^)? attr-dict";
+  
+  let extraClassDeclaration = [{
+    static NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op,
+      LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+  }];
   let llvmBuilder = [{
-    createIntrinsicCall(
-        builder,
-        getUnidirectionalFenceProxyID($fromProxy, $toProxy, $scope, false),
-        {$addr, $size});
+    auto [intId, args, types] = 
+    NVVM::FenceProxyAcquireOp::getIIDAndArgsWithTypes(
+      *op, moduleTranslation, builder);
+    createIntrinsicCall(builder, intId, args);
   }];
 
   let hasVerifier = 1;
@@ -904,9 +917,16 @@ def NVVM_FenceProxyReleaseOp : NVVM_Op<"fence.proxy.release">,
   }];
 
   let assemblyFormat = "$scope (`from_proxy` `=` $fromProxy^)? (`to_proxy` `=` $toProxy^)? attr-dict";
+
+  let extraClassDeclaration = [{
+    static NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op,
+      LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+  }];
   let llvmBuilder = [{
-    createIntrinsicCall(builder, getUnidirectionalFenceProxyID(
-                                     $fromProxy, $toProxy, $scope, true));
+    auto [intId, args, types] = NVVM::FenceProxyReleaseOp::getIIDAndArgsWithTypes(
+      *op, moduleTranslation, builder);
+    createIntrinsicCall(builder, intId, args);
   }];
 
   let hasVerifier = 1;
@@ -985,11 +1005,15 @@ def NVVM_ShflOp :
     
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-shfl-sync)
   }];
+  let extraClassDeclaration = [{
+    static NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op,
+      LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+  }];
   string llvmBuilder = [{
-      auto intId = getShflIntrinsicId(
-          $_resultType, $kind, static_cast<bool>($return_value_and_is_valid));
-      $res = createIntrinsicCall(builder,
-          intId, {$thread_mask, $val, $offset, $mask_and_clamp});
+      auto [intId, args, types] = NVVM::ShflOp::getIIDAndArgsWithTypes(
+          *op, moduleTranslation, builder);
+      $res = createIntrinsicCall(builder, intId, args);
   }];
   let assemblyFormat = [{
     $kind $thread_mask `,` $val `,` $offset `,` $mask_and_clamp  attr-dict
@@ -1035,9 +1059,16 @@ def NVVM_VoteSyncOp
 
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-vote-sync)
   }];
+  let extraClassDeclaration = [{
+    static NVVM::IIDArgsWithTypes
+      getIIDAndArgsWithTypes(Operation &op,
+        LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+  }];
   string llvmBuilder = [{
-    auto intId = getVoteSyncIntrinsicId($kind);
-    $res = createIntrinsicCall(builder, intId, {$mask, $pred});
+    auto [intId, args, types] = 
+    NVVM::VoteSyncOp::getIIDAndArgsWithTypes(
+      *op, moduleTranslation, builder);
+    $res = createIntrinsicCall(builder, intId, args);
   }];
   let assemblyFormat = "$kind $mask `,` $pred attr-dict `->` type($res)";
   let hasVerifier = 1;
@@ -1108,15 +1139,14 @@ def NVVM_CpAsyncOp : NVVM_Op<"cp.async.shared.global">,
   let assemblyFormat = "$dst `,` $src `,` $size `,` `cache` `=` $modifier (`,` $cpSize^)? attr-dict `:` type(operands)";
   let hasVerifier = 1;
   let extraClassDeclaration = [{
-    static llvm::Intrinsic::ID
-      getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
-                            llvm::SmallVector<llvm::Value *> &args);
+    static NVVM::IIDArgsWithTypes
+      getIIDAndArgsWithTypes(Operation &op, LLVM::ModuleTranslation &mt,
+                            llvm::IRBuilderBase &builder);
   }];
   string llvmBuilder = [{
-    llvm::SmallVector<llvm::Value *> translatedOperands;
-    auto id = NVVM::CpAsyncOp::getIntrinsicIDAndArgs(
-      *op, moduleTranslation, translatedOperands);
-    createIntrinsicCall(builder, id, translatedOperands);
+    auto [id, args, types] = NVVM::CpAsyncOp::getIIDAndArgsWithTypes(
+      *op, moduleTranslation, builder);
+    createIntrinsicCall(builder, id, args);
   }];
 }
 
@@ -2107,10 +2137,16 @@ def NVVM_StMatrixOp: NVVM_Op<"stmatrix">,
     
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-store-instruction-stmatrix)
   }];
+  let extraClassDeclaration = [{
+    static NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op,
+      LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+  }];
   string llvmBuilder = [{
-      auto operands = moduleTranslation.lookupValues(opInst.getOperands());
-      auto intId = getStMatrixIntrinsicId($layout, $sources.size(), $shape, $eltType);
-      createIntrinsicCall(builder, intId, operands, operands[0]->getType());
+      auto [intId, args, types] = 
+      NVVM::StMatrixOp::getIIDAndArgsWithTypes(
+        *op, moduleTranslation, builder);
+      createIntrinsicCall(builder, intId, args, types);
   }];
   let assemblyFormat = "$ptr `,` $sources attr-dict `:` type(operands)";
   let hasVerifier = 1;
@@ -2125,10 +2161,16 @@ def NVVM_LdMatrixOp: NVVM_Op<"ldmatrix">,
 
   let summary = "cooperative matrix load";
 
+  let extraClassDeclaration = [{
+    static NVVM::IIDArgsWithTypes
+      getIIDAndArgsWithTypes(Operation &op,
+        LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+  }];
   string llvmBuilder = [{
-      auto operands = moduleTranslation.lookupValues(opInst.getOperands());
-      auto intId = getLdMatrixIntrinsicId($layout, $num, $shape, $eltType);
-      $res = createIntrinsicCall(builder, intId, operands, {operands[0]->getType()});
+      auto [intId, args, types] = 
+      NVVM::LdMatrixOp::getIIDAndArgsWithTypes(
+        *op, moduleTranslation, builder);
+      $res = createIntrinsicCall(builder, intId, args, types);
   }];
 
   string baseDescription = [{
@@ -2543,8 +2585,8 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp :
   let extraClassDeclaration = [{
     bool hasIntrinsic() { return !getPredicate(); }
 
-    static mlir::NVVM::IDArgPair
-    getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+    static mlir::NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op, LLVM::ModuleTranslation &mt,
                           llvm::IRBuilderBase& builder);
   }];
 
@@ -2565,7 +2607,7 @@ def NVVM_CpAsyncBulkTensorSharedCTAToGlobalOp :
   let hasVerifier = 1;
 
   string llvmBuilder = [{
-    auto [id, args] = NVVM::CpAsyncBulkTensorSharedCTAToGlobalOp::getIntrinsicIDAndArgs(
+    auto [id, args, types] = NVVM::CpAsyncBulkTensorSharedCTAToGlobalOp::getIIDAndArgsWithTypes(
                       *op, moduleTranslation, builder);
     createIntrinsicCall(builder, id, args);
   }];
@@ -2631,8 +2673,8 @@ def NVVM_PrefetchOp : NVVM_Op<"prefetch",
   let hasVerifier = 1;
 
   let extraClassDeclaration = [{
-    static NVVM::IDArgPair
-    getIntrinsicIDAndArgs(NVVM::PrefetchOp &op,LLVM::ModuleTranslation &mt,
+    static NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op, LLVM::ModuleTranslation &mt,
                           llvm::IRBuilderBase &builder);
     bool hasIntrinsic() { return !getPredicate() || !getTensormap(); }
   }];
@@ -2643,7 +2685,7 @@ def NVVM_PrefetchOp : NVVM_Op<"prefetch",
     }
   }];
   let llvmBuilder = [{
-    auto [id, args] = NVVM::PrefetchOp::getIntrinsicIDAndArgs(op,
+    auto [id, args, types] = NVVM::PrefetchOp::getIIDAndArgsWithTypes(*op,
                                           moduleTranslation, builder);
 
     if(op.getTensormap())
@@ -2685,13 +2727,13 @@ def NVVM_CpAsyncBulkPrefetchOp : NVVM_Op<"cp.async.bulk.prefetch"> {
   }];
 
   let extraClassDeclaration = [{
-    static mlir::NVVM::IDArgPair
-    getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+    static mlir::NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op, LLVM::ModuleTranslation &mt,
                           llvm::IRBuilderBase& builder);
   }];
 
   string llvmBuilder = [{
-    auto [id, args] = NVVM::CpAsyncBulkPrefetchOp::getIntrinsicIDAndArgs(
+    auto [id, args, types] = NVVM::CpAsyncBulkPrefetchOp::getIIDAndArgsWithTypes(
                       *op, moduleTranslation, builder);
     createIntrinsicCall(builder, id, args);
   }];
@@ -2726,15 +2768,15 @@ def NVVM_CpAsyncBulkTensorPrefetchOp :
   }];
 
   let extraClassDeclaration = [{
-    static mlir::NVVM::IDArgPair
-    getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+    static mlir::NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op, LLVM::ModuleTranslation &mt,
                           llvm::IRBuilderBase& builder);
   }];
 
   let hasVerifier = 1;
 
   string llvmBuilder = [{
-    auto [id, args] = NVVM::CpAsyncBulkTensorPrefetchOp::getIntrinsicIDAndArgs(
+    auto [id, args, types] = NVVM::CpAsyncBulkTensorPrefetchOp::getIIDAndArgsWithTypes(
                       *op, moduleTranslation, builder);
     createIntrinsicCall(builder, id, args);
   }];
@@ -2795,35 +2837,17 @@ def NVVM_CpAsyncBulkTensorReduceOp :
   }];
 
   let extraClassDeclaration = [{
-    static llvm::Intrinsic::ID getIntrinsicID(int tensorDims,
-                                              NVVM::TMAReduxKind kind,
-                                              bool isIm2Col);
+    static mlir::NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op,
+      LLVM::ModuleTranslation &mt, llvm::IRBuilderBase& builder);
   }];
 
   let hasVerifier = 1;
 
   string llvmBuilder = [{
-    // Arguments to the intrinsic:
-    // shared_mem_ptr, tmaDesc, tensorDims
-    // cache_hint(if applicable) and flag(boolean)
-    llvm::SmallVector<llvm::Value *> translatedOperands;
-    translatedOperands.push_back($srcMem);
-    translatedOperands.push_back($tmaDescriptor);
-
-    for (auto v : op.getCoordinates())
-      translatedOperands.push_back(moduleTranslation.lookupValue(v));
-
-    llvm::LLVMContext &ctx = moduleTranslation.getLLVMContext();
-    auto *i64Undef = llvm::UndefValue::get(llvm::IntegerType::get(ctx, 64));
-
-    bool isCacheHint = op.getL2CacheHint() ? true : false;
-    translatedOperands.push_back(isCacheHint ? $l2CacheHint : i64Undef);
-    translatedOperands.push_back(builder.getInt1(isCacheHint));
-
-    auto intId = NVVM::CpAsyncBulkTensorReduceOp::getIntrinsicID(
-                 op.getCoordinates().size(), $redKind,
-                 (op.getMode() == NVVM::TMAStoreMode::IM2COL));
-    createIntrinsicCall(builder, intId, translatedOperands);
+    auto [id, args, types] = NVVM::CpAsyncBulkTensorReduceOp::getIIDAndArgsWithTypes(
+                      *op, moduleTranslation, builder);
+    createIntrinsicCall(builder, id, args);
   }];
 }
 
@@ -2860,36 +2884,17 @@ def NVVM_CpAsyncBulkGlobalToSharedClusterOp :
     (`l2_cache_hint` `=` $l2CacheHint^ )?
     attr-dict  `:` type($dstMem) `,` type($srcMem)
   }];
+  
+  let extraClassDeclaration = [{
+    static mlir::NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op,
+      LLVM::ModuleTranslation &mt, llvm::IRBuilderBase& builder);
+  }];
 
   string llvmBuilder = [{
-    // Arguments to the intrinsic:
-    // dst, mbar, src, size
-    // multicast_mask, cache_hint,
-    // flag for multicast_mask,
-    // flag for cache_hint
-    llvm::SmallVector<llvm::Value *> translatedOperands;
-    translatedOperands.push_back($dstMem);
-    translatedOperands.push_back($mbar);
-    translatedOperands.push_back($srcMem);
-    translatedOperands.push_back($size);
-
-    // Multicast, if available
-    llvm::LLVMContext &ctx = moduleTranslation.getLLVMContext();
-    auto *i16Unused = llvm::ConstantInt::get(llvm::Type::getInt16Ty(ctx), 0);
-    bool isMulticast = op.getMulticastMask() ? true : false;
-    translatedOperands.push_back(isMulticast ? $multicastMask : i16Unused);
-
-    // Cachehint, if available
-    auto *i64Unused = llvm::ConstantInt::get(llvm::Type::getInt64Ty(ctx), 0);
-    bool isCacheHint = op.getL2CacheHint() ? true : false;
-    translatedOperands.push_back(isCacheHint ? $l2CacheHint : i64Unused);
-
-    // Flag arguments for multicast and cachehint
-    translatedOperands.push_back(builder.getInt1(isMulticast));
-    translatedOperands.push_back(builder.getInt1(isCacheHint));
-
-    createIntrinsicCall(builder,
-      llvm::Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster, translatedOperands);
+    auto [id, args, types] = NVVM::CpAsyncBulkGlobalToSharedClusterOp::getIIDAndArgsWithTypes(
+                      *op, moduleTranslation, builder);
+    createIntrinsicCall(builder, id, args);
   }];
 }
 
@@ -2971,12 +2976,12 @@ def NVVM_CpAsyncBulkSharedCTAToGlobalOp :
   }];
 
   let extraClassDeclaration = [{
-    static mlir::NVVM::IDArgPair
-    getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+    static mlir::NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op, LLVM::ModuleTranslation &mt,
                           llvm::IRBuilderBase& builder);
   }];
   string llvmBuilder = [{
-    auto [id, args] = NVVM::CpAsyncBulkSharedCTAToGlobalOp::getIntrinsicIDAndArgs(
+    auto [id, args, types] = NVVM::CpAsyncBulkSharedCTAToGlobalOp::getIIDAndArgsWithTypes(
                       *op, moduleTranslation, builder);
     createIntrinsicCall(builder, id, args);
   }];
@@ -3276,11 +3281,16 @@ def NVVM_MatchSyncOp : NVVM_Op<"match.sync">,
 
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-match-sync)
   }];
+  let extraClassDeclaration = [{
+    static NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op,
+      LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+  }];
   string llvmBuilder = [{
-    auto intId = getMatchSyncIntrinsicId(
-        op.getVal().getType(), $kind);
-    $res = createIntrinsicCall(builder,
-        intId, {$thread_mask, $val});
+    auto [intId, args, types] = 
+    NVVM::MatchSyncOp::getIIDAndArgsWithTypes(
+      *op, moduleTranslation, builder);
+    $res = createIntrinsicCall(builder, intId, args);
   }];
   let assemblyFormat = "$kind $thread_mask `,` $val attr-dict `:` type($val) `->` type($res)";
   let hasVerifier = 1;
@@ -3304,11 +3314,16 @@ def NVVM_BulkStoreOp: NVVM_Op<"st.bulk"> {
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-st-bulk)
   }];
 
+  let extraClassDeclaration = [{
+    static NVVM::IIDArgsWithTypes
+    getIIDAndArgsWithTypes(Operation &op,
+      LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+  }];
   string llvmBuilder = [{
-    auto intId = getStBulkIntrinsicId(
-          llvm::cast<LLVM::LLVMPointerType>(op.getAddr().getType()));
-    createIntrinsicCall(builder, intId,
-                      {$addr, $size, builder.getInt64($initVal)});
+    auto [intId, args, types] = 
+    NVVM::BulkStoreOp::getIIDAndArgsWithTypes(
+      *op, moduleTranslation, builder);
+    createIntrinsicCall(builder, intId, args);
   }];
   
   let assemblyFormat = "$addr `,` `size` `=` $size (`,` `init` `=` $initVal^)? attr-dict `:` type($addr)";
@@ -3392,14 +3407,13 @@ def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc", [NVVMRequiresSMa<[100, 101]>]
   let assemblyFormat = "$addr `,` $nCols attr-dict `:` type(operands)";
 
   let extraClassDeclaration = [{
-    static llvm::Intrinsic::ID
-      getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
-                            llvm::SmallVector<llvm::Value *> &args);
+    static NVVM::IIDArgsWithTypes
+      getIIDAndArgsWithTypes(Operation &op, LLVM::ModuleTranslation &mt,
+                            llvm::IRBuilderBase &builder);
   }];
   string llvmBuilder = [{
-    llvm::SmallVector<llvm::Value *> args;
-    auto id = NVVM::Tcgen05AllocOp::getIntrinsicIDAndArgs(
-      *op, moduleTranslation, args);
+    auto [id, args, types] = NVVM::Tcgen05AllocOp::getIIDAndArgsWithTypes(
+      *op, moduleTranslation, builder);
     createIntrinsicCall(builder, id, args);
   }];
 }
@@ -3420,14 +3434,13 @@ def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc", [NVVMRequiresSMa<[100, 10
   let assemblyFormat = "$taddr `,` $nCols attr-dict `:` type(operands)";
 
   let extraClassDeclaration = [{
-    static llvm::Intrinsic::ID
-      getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
-                            llvm::SmallVector<llvm::Value *> &args);
+    static NVVM::IIDArgsWithTypes
+      getIIDAndArgsWithTypes(Operation &op, LLVM::ModuleTranslation &mt,
+                            llvm::IRBuilderBase &builder);
   }];
   string llvmBuilder = [{
-    llvm::SmallVector<llvm::Value *> args;
-    auto id = NVVM::Tcgen05DeallocOp::getIntrinsicIDAndArgs(
-      *op, moduleTranslation, args);
+    auto [id, args, types] = NVVM::Tcgen05DeallocOp::getIIDAndArgsWithTypes(
+      *op, moduleTranslation, builder);
     createIntrinsicCall(builder, id, args);
   }];
 }
@@ -3524,15 +3537,14 @@ def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit", [NVVMRequiresSMa<[100, 101]
   }];
 
   let extraClassDeclaration = [{
-    static llvm::Intrinsic::ID
-      getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
-                            llvm::SmallVector<llvm::Value *> &args);
+    static NVVM::IIDArgsWithTypes
+      getIIDAndArgsWithTypes(Operation &op, LLVM::ModuleTranslation &mt,
+                            llvm::IRBuilderBase &builder);
   }];
 
   string llvmBuilder = [{
-    llvm::SmallVector<llvm::Value *> args;
-    auto id = NVVM::Tcgen05CommitOp::getIntrinsicIDAndArgs(
-      *op, moduleTranslation, args);
+    auto [id, args, types] = NVVM::Tcgen05CommitOp::getIIDAndArgsWithTypes(
+      *op, moduleTranslation, builder);
     createIntrinsicCall(builder, id, args);
   }];
 }
@@ -3636,12 +3648,14 @@ def NVVM_Tcgen05CpOp : NVVM_Op<"tcgen05.cp", [NVVMRequiresSMa<[100,...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/157079