[flang-commits] [flang] [flang][cuda] Add interfaces and lowering for tma_bulk_load (PR #165474)
Zhen Wang via flang-commits
flang-commits at lists.llvm.org
Tue Oct 28 14:05:37 PDT 2025
================
@@ -2067,6 +2067,67 @@ attributes(device) subroutine tma_bulk_s2g(src, dst, nbytes)
end subroutine
end interface
+ ! Load specific types, count is in elements
+ ! -----------------------------------------
+ interface tma_bulk_load
+ attributes(device) subroutine tma_bulk_ldi4(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ integer(4), device :: src(*)
+ integer(4), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldi8(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ integer(8), device :: src(*)
+ integer(8), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldr2(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ real(2), device :: src(*)
+ real(2), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldr4(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ real(4), device :: src(*)
+ real(4), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldr8(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ real(8), device :: src(*)
+ real(8), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldc4(barrier, src, dst, nelems)
----------------
wangzpgi wrote:
Can you move these two (c4, c8) before i4 so the order align with other places?
https://github.com/llvm/llvm-project/pull/165474
More information about the flang-commits
mailing list