[llvm] [NVPTX][NFC] Move more TMA lowering to tablegen (PR #140914)
Durgadoss R via llvm-commits
llvm-commits at lists.llvm.org
Thu May 22 06:06:00 PDT 2025
================
@@ -42,16 +42,16 @@ define void @cp_async_bulk_g2s(ptr addrspace(1) %src, ptr addrspace(3) %bar, ptr
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [cp_async_bulk_g2s_param_1];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [cp_async_bulk_g2s_param_2];
; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r3, [cp_async_bulk_g2s_param_3];
-; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%r2], [%rd1], %r3, [%r1];
+; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_g2s_param_4];
; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_g2s_param_5];
+; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%r2], [%rd1], %r3, [%r1];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes.L2::cache_hint [%r2], [%rd1], %r3, [%r1], %rd2;
-; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_g2s_param_4];
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes.multicast::cluster [%r2], [%rd1], %r3, [%r1], %rs1;
; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes.multicast::cluster.L2::cache_hint [%r2], [%rd1], %r3, [%r1], %rs1, %rd2;
; CHECK-PTX-SHARED32-NEXT: ret;
- tail call void @llvm.nvvm.cp.async.bulk.global.to.shared.cluster(ptr addrspace(7) %dst, ptr addrspace(3) %bar, ptr addrspace(1) %src, i32 %size, i16 0, i64 0, i1 0, i1 0)
- tail call void @llvm.nvvm.cp.async.bulk.global.to.shared.cluster(ptr addrspace(7) %dst, ptr addrspace(3) %bar, ptr addrspace(1) %src, i32 %size, i16 0, i64 %ch, i1 0, i1 1)
- tail call void @llvm.nvvm.cp.async.bulk.global.to.shared.cluster(ptr addrspace(7) %dst, ptr addrspace(3) %bar, ptr addrspace(1) %src, i32 %size, i16 %mc, i64 0, i1 1, i1 0)
+ tail call void @llvm.nvvm.cp.async.bulk.global.to.shared.cluster(ptr addrspace(7) %dst, ptr addrspace(3) %bar, ptr addrspace(1) %src, i32 %size, i16 %mc, i64 %ch, i1 0, i1 0)
+ tail call void @llvm.nvvm.cp.async.bulk.global.to.shared.cluster(ptr addrspace(7) %dst, ptr addrspace(3) %bar, ptr addrspace(1) %src, i32 %size, i16 %mc, i64 %ch, i1 0, i1 1)
+ tail call void @llvm.nvvm.cp.async.bulk.global.to.shared.cluster(ptr addrspace(7) %dst, ptr addrspace(3) %bar, ptr addrspace(1) %src, i32 %size, i16 %mc, i64 %ch, i1 1, i1 0)
----------------
durga4github wrote:
In the earlier version, we passed constants for the cases where the `mc/ch` were ignored (since the `flag` is false). So, the tests were not meaningful.
In the latest revision, I have added more tests where the `mc/ch` are constants and are being used.
https://github.com/llvm/llvm-project/pull/140914
More information about the llvm-commits
mailing list