[llvm] [NVPTX] Add TMA bulk tensor reduction intrinsics (PR #116854)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 26 10:41:41 PST 2024
================
@@ -596,11 +598,37 @@ multiclass CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, bit shared32, string mode> {
Requires<[hasPTX<80>, hasSM<90>]>;
}
+def TMAReductionFlags : Operand<i32> {
+ let PrintMethod = "printTmaReductionMode";
+}
+
+// TMA Copy from Shared to Global memory with Reduction
+multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, bit shared32, string mode> {
+ defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
+ defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
+ defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]";
+ defvar rc = !if(shared32, Int32Regs, Int64Regs);
+
+ defvar prefix = "cp.reduce.async.bulk.tensor" # "." # dim # "d" # ".global.shared::cta";
+ defvar suffix = "." # mode # ".bulk_group";
+
+ def "": NVPTXInst<(outs),
----------------
Artem-B wrote:
I think you can use the magic `NAME` name here which may look a bit less odd than `""`.
```
def NAME: ...
def NAME # _CH: ...
```
https://github.com/llvm/llvm-project/pull/116854
More information about the llvm-commits
mailing list