[flang-commits] [flang] [flang][cuda] Add interfaces and lowering for tma_bulk_load (PR #165474)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Tue Oct 28 14:07:26 PDT 2025


================
@@ -2067,6 +2067,67 @@ attributes(device) subroutine tma_bulk_s2g(src, dst, nbytes)
     end subroutine
   end interface
 
+  ! Load specific types, count is in elements
+  ! -----------------------------------------
+  interface tma_bulk_load
+    attributes(device) subroutine tma_bulk_ldi4(barrier, src, dst, nelems)
+      !dir$ ignore_tkr (r) src, (r) dst
+      integer(8), shared :: barrier
+      integer(4), device :: src(*)
+      integer(4), shared :: dst(*)
+      integer(4), value :: nelems
+    end subroutine
+
+    attributes(device) subroutine tma_bulk_ldi8(barrier, src, dst, nelems)
+      !dir$ ignore_tkr (r) src, (r) dst
+      integer(8), shared :: barrier
+      integer(8), device :: src(*)
+      integer(8), shared :: dst(*)
+      integer(4), value :: nelems
+    end subroutine
+
+    attributes(device) subroutine tma_bulk_ldr2(barrier, src, dst, nelems)
+      !dir$ ignore_tkr (r) src, (r) dst
+      integer(8), shared :: barrier
+      real(2), device :: src(*)
+      real(2), shared :: dst(*)
+      integer(4), value :: nelems
+    end subroutine
+
+    attributes(device) subroutine tma_bulk_ldr4(barrier, src, dst, nelems)
+      !dir$ ignore_tkr (r) src, (r) dst
+      integer(8), shared :: barrier
+      real(4), device :: src(*)
+      real(4), shared :: dst(*)
+      integer(4), value :: nelems
+    end subroutine
+
+    attributes(device) subroutine tma_bulk_ldr8(barrier, src, dst, nelems)
+      !dir$ ignore_tkr (r) src, (r) dst
+      integer(8), shared :: barrier
+      real(8), device :: src(*)
+      real(8), shared :: dst(*)
+      integer(4), value :: nelems
+    end subroutine
+
+    attributes(device) subroutine tma_bulk_ldc4(barrier, src, dst, nelems)
+      !dir$ ignore_tkr (r) src, (r) dst
+      integer(8), shared :: barrier
+      complex(4), device :: src(*)
+      complex(4), shared :: dst(*)
+      integer(4), value :: nelems
+    end subroutine
+
+    attributes(device) subroutine tma_bulk_ldc8(barrier, src, dst, nelems)
+      !dir$ ignore_tkr (r) src, (r) dst
+      integer(8), shared :: barrier
+      complex(8), device :: src(*)
+      complex(8), shared :: dst(*)
+      integer(4), value :: nelems
+    end subroutine
----------------
clementval wrote:

```suggestion
    attributes(device) subroutine tma_bulk_ldc4(barrier, src, dst, nelems)
      !dir$ ignore_tkr (r) src, (r) dst
      integer(8), shared :: barrier
      complex(4), device :: src(*)
      complex(4), shared :: dst(*)
      integer(4), value :: nelems
    end subroutine

    attributes(device) subroutine tma_bulk_ldc8(barrier, src, dst, nelems)
      !dir$ ignore_tkr (r) src, (r) dst
      integer(8), shared :: barrier
      complex(8), device :: src(*)
      complex(8), shared :: dst(*)
      integer(4), value :: nelems
    end subroutine
  
    attributes(device) subroutine tma_bulk_ldi4(barrier, src, dst, nelems)
      !dir$ ignore_tkr (r) src, (r) dst
      integer(8), shared :: barrier
      integer(4), device :: src(*)
      integer(4), shared :: dst(*)
      integer(4), value :: nelems
    end subroutine

    attributes(device) subroutine tma_bulk_ldi8(barrier, src, dst, nelems)
      !dir$ ignore_tkr (r) src, (r) dst
      integer(8), shared :: barrier
      integer(8), device :: src(*)
      integer(8), shared :: dst(*)
      integer(4), value :: nelems
    end subroutine

    attributes(device) subroutine tma_bulk_ldr2(barrier, src, dst, nelems)
      !dir$ ignore_tkr (r) src, (r) dst
      integer(8), shared :: barrier
      real(2), device :: src(*)
      real(2), shared :: dst(*)
      integer(4), value :: nelems
    end subroutine

    attributes(device) subroutine tma_bulk_ldr4(barrier, src, dst, nelems)
      !dir$ ignore_tkr (r) src, (r) dst
      integer(8), shared :: barrier
      real(4), device :: src(*)
      real(4), shared :: dst(*)
      integer(4), value :: nelems
    end subroutine

    attributes(device) subroutine tma_bulk_ldr8(barrier, src, dst, nelems)
      !dir$ ignore_tkr (r) src, (r) dst
      integer(8), shared :: barrier
      real(8), device :: src(*)
      real(8), shared :: dst(*)
      integer(4), value :: nelems
    end subroutine
```

https://github.com/llvm/llvm-project/pull/165474


More information about the flang-commits mailing list