[llvm] [NVPTX] Add TMA bulk tensor copy intrinsics (PR #96083)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 18 11:20:33 PDT 2024
================
@@ -4091,3 +4096,246 @@ unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
}
}
}
+
+static size_t GetCpAsyncBulkTensorDimFromIntrinsic(unsigned IID) {
+ switch (IID) {
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_1d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_1d:
+ return 1;
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_2d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_2d:
+ return 2;
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_3d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_3d:
+ return 3;
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_4d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_4d:
+ return 4;
+ case Intrinsic::nvvm_cp_async_bulk_tensor_smem_to_gmem_5d:
+ case Intrinsic::nvvm_cp_async_bulk_tensor_gmem_to_smem_5d:
+ return 5;
+ default:
+ llvm_unreachable(
+ "Invalid Tensor dim in nvvm_cp_async_bulk_tensor intrinsic");
+ }
+}
+
+#define CP_ASYNC_BULK_TENSOR_OPCODE(dir, dim, mode, suffix) \
+ if (IsShared32) { \
+ return NVPTX:: \
+ CP_ASYNC_BULK_TENSOR_##dir##_##dim##_SHARED32_##mode##suffix; \
+ } else { \
+ return NVPTX::CP_ASYNC_BULK_TENSOR_##dir##_##dim##_##mode##suffix; \
+ }
+
+#define GET_CP_ASYNC_BULK_TENSOR_OPCODE_S2G(dim, mode) \
+ do { \
----------------
Artem-B wrote:
This could also be trimmed down to just `return IsCacheHint ? ...: ...`
https://github.com/llvm/llvm-project/pull/96083
More information about the llvm-commits
mailing list