[Mlir-commits] [mlir] [MLIR][NVVM] Extend TMA Bulk Copy Op (PR #140232)
Durgadoss R
llvmlistbot at llvm.org
Fri May 16 05:18:41 PDT 2025
================
@@ -2599,51 +2599,48 @@ def NVVM_CpAsyncBulkSharedCTAToSharedClusterOp :
}
def NVVM_CpAsyncBulkSharedCTAToGlobalOp :
- NVVM_Op<"cp.async.bulk.global.shared.cta"> {
+ NVVM_Op<"cp.async.bulk.global.shared.cta", [AttrSizedOperandSegments]> {
let summary = "Async bulk copy from Shared CTA memory to Global memory";
let description = [{
Initiates an asynchronous copy operation from Shared CTA memory to
- global memory.
+ global memory. The 32-bit operand `size` specifies the amount of
+ memory to be copied, in terms of number of bytes. `size` must be a
+ multiple of 16. The `l2CacheHint` operand is optional, and it is used
+ to specify cache eviction policy that may be used during the memory
+ access. The `byteMask` operand is optional. The i-th bit in the 16-bit
+ wide `byteMask` specifies whether the i-th byte of each 16-byte wide
+ chunk of source data is copied to the destination. If the bit is set,
+ the byte is copied.
- The `l2CacheHint` operand is optional, and it is used to specify cache
- eviction policy that may be used during the memory access.
-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
}];
let arguments = (ins
LLVM_PointerGlobal:$dstMem,
LLVM_PointerShared:$srcMem,
I32:$size,
- Optional<I64>:$l2CacheHint);
+ Optional<I64>:$l2CacheHint,
+ Optional<I16>:$byteMask);
let assemblyFormat = [{
$dstMem `,` $srcMem `,` $size
(`l2_cache_hint` `=` $l2CacheHint^ )?
- attr-dict `:` type($dstMem) `,` type($srcMem)
+ (`byte_mask` `=` $byteMask^ )?
+ attr-dict `:` type($dstMem) `,` type($srcMem)
}];
+ let extraClassDeclaration = [{
+ static llvm::Intrinsic::ID
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::SmallVector<llvm::Value *> &args,
+ llvm::IRBuilderBase& builder);
+ }];
string llvmBuilder = [{
- // Arguments to the intrinsic:
- // dst, src, size, cache_hint,
- // Flag for cache_hint
- //
- llvm::SmallVector<llvm::Value *> translatedOperands;
- translatedOperands.push_back($dstMem);
- translatedOperands.push_back($srcMem);
- translatedOperands.push_back($size);
-
- // Cachehint, if available
- llvm::LLVMContext &ctx = moduleTranslation.getLLVMContext();
- auto *i64Unused = llvm::ConstantInt::get(llvm::Type::getInt64Ty(ctx), 0);
- bool isCacheHint = op.getL2CacheHint() ? true : false;
- translatedOperands.push_back(isCacheHint ? $l2CacheHint : i64Unused);
-
- // Flag argument for cachehint
- translatedOperands.push_back(builder.getInt1(isCacheHint));
-
- createIntrinsicCall(builder,
- llvm::Intrinsic::nvvm_cp_async_bulk_shared_cta_to_global, translatedOperands);
+ llvm::SmallVector<llvm::Value *> args;
+ llvm::Intrinsic::ID id =
+ NVVM::CpAsyncBulkSharedCTAToGlobalOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, args, builder);
----------------
durga4github wrote:
Sure. Both the `id` and `args` are updated within the function now.
https://github.com/llvm/llvm-project/pull/140232
More information about the Mlir-commits
mailing list