[llvm] [NVPTX] Add TMA bulk tensor copy intrinsics (PR #96083)
Durgadoss R via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 07:54:31 PDT 2024
================
@@ -4091,3 +4096,246 @@ unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
}
}
}
+
+static size_t GetCpAsyncBulkTensorDimFromIntrinsic(unsigned IID) {
+ switch (IID) {
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_1d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_1d:
+ return 1;
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_2d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_2d:
+ return 2;
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_3d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_3d:
+ return 3;
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_4d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_4d:
+ return 4;
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_5d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_5d:
+ return 5;
+ default:
+ llvm_unreachable(
+ "Invalid Tensor dim in nvvm_cp_async_bulk_tensor intrinsic");
+ }
+}
+
+#define CP_ASYNC_BULK_TENSOR_OPCODE(dir, dim, mode, suffix) \
+ if (IsShared32) { \
+ return NVPTX:: \
+ CP_ASYNC_BULK_TENSOR_##dir##_##dim##_SHARED32_##mode##suffix; \
+ } else { \
+ return NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_##mode##suffix; \
+ }
+
+#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(dim, mode) \
+ do { \
+ if (IsCacheHint) { \
+ CP_ASYNC_BULK_TENSOR_OPCODE(SMEM_TO_GMEM, dim, mode, _CH); \
+ } else { \
+ CP_ASYNC_BULK_TENSOR_OPCODE(SMEM_TO_GMEM, dim, mode, ); \
+ } \
+ } while (0)
+
+#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_G2S(dim, mode) \
+ do { \
+ if (IsMultiCast && IsCacheHint) { \
+ CP_ASYNC_BULK_TENSOR_OPCODE(GMEM_TO_SMEM, dim, mode, _MC_CH); \
----------------
durga4github wrote:
Yes, moved this to a lambda in the latest revision. Please let me know if it looks good.
https://github.com/llvm/llvm-project/pull/96083
More information about the llvm-commits
mailing list