[flang-commits] [flang] [flang][cuda] Add interfaces and lowering for tma_bulk_load (PR #165474)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Oct 28 14:07:26 PDT 2025
================
@@ -2067,6 +2067,67 @@ attributes(device) subroutine tma_bulk_s2g(src, dst, nbytes)
end subroutine
end interface
+ ! Load specific types, count is in elements
+ ! -----------------------------------------
+ interface tma_bulk_load
+ attributes(device) subroutine tma_bulk_ldi4(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ integer(4), device :: src(*)
+ integer(4), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldi8(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ integer(8), device :: src(*)
+ integer(8), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldr2(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ real(2), device :: src(*)
+ real(2), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldr4(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ real(4), device :: src(*)
+ real(4), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldr8(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ real(8), device :: src(*)
+ real(8), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldc4(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ complex(4), device :: src(*)
+ complex(4), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldc8(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ complex(8), device :: src(*)
+ complex(8), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
----------------
clementval wrote:
```suggestion
attributes(device) subroutine tma_bulk_ldc4(barrier, src, dst, nelems)
!dir$ ignore_tkr (r) src, (r) dst
integer(8), shared :: barrier
complex(4), device :: src(*)
complex(4), shared :: dst(*)
integer(4), value :: nelems
end subroutine
attributes(device) subroutine tma_bulk_ldc8(barrier, src, dst, nelems)
!dir$ ignore_tkr (r) src, (r) dst
integer(8), shared :: barrier
complex(8), device :: src(*)
complex(8), shared :: dst(*)
integer(4), value :: nelems
end subroutine
attributes(device) subroutine tma_bulk_ldi4(barrier, src, dst, nelems)
!dir$ ignore_tkr (r) src, (r) dst
integer(8), shared :: barrier
integer(4), device :: src(*)
integer(4), shared :: dst(*)
integer(4), value :: nelems
end subroutine
attributes(device) subroutine tma_bulk_ldi8(barrier, src, dst, nelems)
!dir$ ignore_tkr (r) src, (r) dst
integer(8), shared :: barrier
integer(8), device :: src(*)
integer(8), shared :: dst(*)
integer(4), value :: nelems
end subroutine
attributes(device) subroutine tma_bulk_ldr2(barrier, src, dst, nelems)
!dir$ ignore_tkr (r) src, (r) dst
integer(8), shared :: barrier
real(2), device :: src(*)
real(2), shared :: dst(*)
integer(4), value :: nelems
end subroutine
attributes(device) subroutine tma_bulk_ldr4(barrier, src, dst, nelems)
!dir$ ignore_tkr (r) src, (r) dst
integer(8), shared :: barrier
real(4), device :: src(*)
real(4), shared :: dst(*)
integer(4), value :: nelems
end subroutine
attributes(device) subroutine tma_bulk_ldr8(barrier, src, dst, nelems)
!dir$ ignore_tkr (r) src, (r) dst
integer(8), shared :: barrier
real(8), device :: src(*)
real(8), shared :: dst(*)
integer(4), value :: nelems
end subroutine
```
https://github.com/llvm/llvm-project/pull/165474
More information about the flang-commits
mailing list