[Mlir-commits] [mlir] 2dd7a9c - [MLIR] NFC: Rename mcuMemHostRegister* to mgpuMemHostRegister* to make it consistent with the other cuda-runner functions and ROCm.

Mon Jul 27 06:48:16 PDT 2020

Author: Christian Sigg
Date: 2020-07-27T15:48:05+02:00
New Revision: 2dd7a9cc2d0572c3d1e5b9ce554a0800079863c9

URL: https://github.com/llvm/llvm-project/commit/2dd7a9cc2d0572c3d1e5b9ce554a0800079863c9
DIFF: https://github.com/llvm/llvm-project/commit/2dd7a9cc2d0572c3d1e5b9ce554a0800079863c9.diff

LOG: [MLIR] NFC: Rename mcuMemHostRegister* to mgpuMemHostRegister* to make it consistent with the other cuda-runner functions and ROCm.

Summary: Rename mcuMemHostRegister* to mgpuMemHostRegister*.

Reviewers: herhut

Reviewed By: herhut

Subscribers: yaxunl, mehdi_amini, rriddle, jpienaar, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, aartbik, liufengdb, stephenneuendorffer, Joonsoo, grosul1, Kayjukh, jurahul, msifontes

Tags: #mlir

Differential Revision: https://reviews.llvm.org/D84583

Added: 
    

Modified: 
    mlir/test/mlir-cuda-runner/all-reduce-and.mlir
    mlir/test/mlir-cuda-runner/all-reduce-max.mlir
    mlir/test/mlir-cuda-runner/all-reduce-min.mlir
    mlir/test/mlir-cuda-runner/all-reduce-op.mlir
    mlir/test/mlir-cuda-runner/all-reduce-or.mlir
    mlir/test/mlir-cuda-runner/all-reduce-region.mlir
    mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
    mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
    mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
    mlir/test/mlir-cuda-runner/shuffle.mlir
    mlir/test/mlir-cuda-runner/two-modules.mlir
    mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/test/mlir-cuda-runner/all-reduce-and.mlir b/mlir/test/mlir-cuda-runner/all-reduce-and.mlir
index d3ad7a802537..f89f91415724 100644

--- a/mlir/test/mlir-cuda-runner/all-reduce-and.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-and.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,6 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterInt32(%ptr : memref<*xi32>)
+func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 

diff  --git a/mlir/test/mlir-cuda-runner/all-reduce-max.mlir b/mlir/test/mlir-cuda-runner/all-reduce-max.mlir
index ae2f6c3d6b3e..4adf8a73d924 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-max.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-max.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,6 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterInt32(%ptr : memref<*xi32>)
+func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 

diff  --git a/mlir/test/mlir-cuda-runner/all-reduce-min.mlir b/mlir/test/mlir-cuda-runner/all-reduce-min.mlir
index 0cd4f11daf10..8cb3116e9d0d 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-min.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-min.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,6 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterInt32(%ptr : memref<*xi32>)
+func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 

diff  --git a/mlir/test/mlir-cuda-runner/all-reduce-op.mlir b/mlir/test/mlir-cuda-runner/all-reduce-op.mlir
index 67c4f96d36f4..72306674c3ff 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-op.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-op.mlir
@@ -11,7 +11,7 @@ func @main() {
   %sy = dim %dst, %c1 : memref<?x?x?xf32>
   %sz = dim %dst, %c0 : memref<?x?x?xf32>
   %cast_dst = memref_cast %dst : memref<?x?x?xf32> to memref<*xf32>
-  call @mcuMemHostRegisterFloat(%cast_dst) : (memref<*xf32>) -> ()
+  call @mgpuMemHostRegisterFloat(%cast_dst) : (memref<*xf32>) -> ()
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
              threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %sy, %block_z = %sz) {
     %t0 = muli %tz, %block_y : index
@@ -28,5 +28,5 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterFloat(%ptr : memref<*xf32>)
+func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(%ptr : memref<*xf32>)

diff  --git a/mlir/test/mlir-cuda-runner/all-reduce-or.mlir b/mlir/test/mlir-cuda-runner/all-reduce-or.mlir
index cc9eae9e8b66..7d0ed929322e 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-or.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-or.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,6 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterInt32(%ptr : memref<*xi32>)
+func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 

diff  --git a/mlir/test/mlir-cuda-runner/all-reduce-region.mlir b/mlir/test/mlir-cuda-runner/all-reduce-region.mlir
index afd3d7cb038a..a9426c658978 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-region.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-region.mlir
@@ -8,7 +8,7 @@ func @main() {
   %c0 = constant 0 : index
   %sx = dim %dst, %c0 : memref<?xf32>
   %cast_dst = memref_cast %dst : memref<?xf32> to memref<*xf32>
-  call @mcuMemHostRegisterFloat(%cast_dst) : (memref<*xf32>) -> ()
+  call @mgpuMemHostRegisterFloat(%cast_dst) : (memref<*xf32>) -> ()
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
              threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
     %val = index_cast %tx : index to i32
@@ -25,5 +25,5 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterFloat(%ptr : memref<*xf32>)
+func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(memref<*xf32>)

diff  --git a/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir b/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
index a32c4d3eb93e..67461783b257 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
@@ -25,9 +25,9 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_data) : (memref<*xi32>) -> ()
   %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_sum) : (memref<*xi32>) -> ()
 
   store %cst0, %data[%c0, %c0] : memref<2x6xi32>
   store %cst1, %data[%c0, %c1] : memref<2x6xi32>
@@ -58,6 +58,6 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterInt32(%ptr : memref<*xi32>)
+func @mgpuMemHostRegisterInt32(%ptr : memref<*xi32>)
 func @print_memref_i32(memref<*xi32>)
 

diff  --git a/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir b/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
index 0ef33ea6112a..80339c36fb38 100644
--- a/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
+++ b/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
@@ -18,7 +18,7 @@ func @main() {
   %21 = constant 5 : i32
   %22 = memref_cast %arg0 : memref<5xf32> to memref<?xf32>
   %23 = memref_cast %22 : memref<?xf32> to memref<*xf32>
-  call @mcuMemHostRegisterFloat(%23) : (memref<*xf32>) -> ()
+  call @mgpuMemHostRegisterFloat(%23) : (memref<*xf32>) -> ()
   call @print_memref_f32(%23) : (memref<*xf32>) -> ()
   %24 = constant 1.0 : f32
   call @other_func(%24, %22) : (f32, memref<?xf32>) -> ()
@@ -26,5 +26,5 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterFloat(%ptr : memref<*xf32>)
+func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(%ptr : memref<*xf32>)

diff  --git a/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir b/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
index a7b143f760a7..b88d8e1b8ba1 100644
--- a/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
+++ b/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
@@ -26,11 +26,11 @@ func @main() {
   %c6 = constant 6 : index
 
   %cast_data = memref_cast %data : memref<2x6xf32> to memref<*xf32>
-  call @mcuMemHostRegisterFloat(%cast_data) : (memref<*xf32>) -> ()
+  call @mgpuMemHostRegisterFloat(%cast_data) : (memref<*xf32>) -> ()
   %cast_sum = memref_cast %sum : memref<2xf32> to memref<*xf32>
-  call @mcuMemHostRegisterFloat(%cast_sum) : (memref<*xf32>) -> ()
+  call @mgpuMemHostRegisterFloat(%cast_sum) : (memref<*xf32>) -> ()
   %cast_mul = memref_cast %mul : memref<2xf32> to memref<*xf32>
-  call @mcuMemHostRegisterFloat(%cast_mul) : (memref<*xf32>) -> ()
+  call @mgpuMemHostRegisterFloat(%cast_mul) : (memref<*xf32>) -> ()
 
   store %cst0, %data[%c0, %c0] : memref<2x6xf32>
   store %cst1, %data[%c0, %c1] : memref<2x6xf32>
@@ -66,5 +66,5 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterFloat(%ptr : memref<*xf32>)
+func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(memref<*xf32>)

diff  --git a/mlir/test/mlir-cuda-runner/shuffle.mlir b/mlir/test/mlir-cuda-runner/shuffle.mlir
index 0f8cdca3a8eb..a4563cc0c381 100644
--- a/mlir/test/mlir-cuda-runner/shuffle.mlir
+++ b/mlir/test/mlir-cuda-runner/shuffle.mlir
@@ -8,7 +8,7 @@ func @main() {
   %c0 = constant 0 : index
   %sx = dim %dst, %c0 : memref<?xf32>
   %cast_dest = memref_cast %dst : memref<?xf32> to memref<*xf32>
-  call @mcuMemHostRegisterFloat(%cast_dest) : (memref<*xf32>) -> ()
+  call @mgpuMemHostRegisterFloat(%cast_dest) : (memref<*xf32>) -> ()
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
              threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
     %t0 = index_cast %tx : index to i32
@@ -28,5 +28,5 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterFloat(%ptr : memref<*xf32>)
+func @mgpuMemHostRegisterFloat(%ptr : memref<*xf32>)
 func @print_memref_f32(%ptr : memref<*xf32>)

diff  --git a/mlir/test/mlir-cuda-runner/two-modules.mlir b/mlir/test/mlir-cuda-runner/two-modules.mlir
index 3229879d2fb5..ef4dd0c48b8d 100644
--- a/mlir/test/mlir-cuda-runner/two-modules.mlir
+++ b/mlir/test/mlir-cuda-runner/two-modules.mlir
@@ -8,7 +8,7 @@ func @main() {
   %c0 = constant 0 : index
   %sx = dim %dst, %c0 : memref<?xi32>
   %cast_dst = memref_cast %dst : memref<?xi32> to memref<*xi32>
-  call @mcuMemHostRegisterInt32(%cast_dst) : (memref<*xi32>) -> ()
+  call @mgpuMemHostRegisterInt32(%cast_dst) : (memref<*xi32>) -> ()
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
              threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
     %t0 = index_cast %tx : index to i32
@@ -25,5 +25,5 @@ func @main() {
   return
 }
 
-func @mcuMemHostRegisterInt32(%memref : memref<*xi32>)
+func @mgpuMemHostRegisterInt32(%memref : memref<*xi32>)
 func @print_memref_i32(%memref : memref<*xi32>)

diff  --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
index 705fa9f00930..2b71eb34703b 100644
--- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
+++ b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
@@ -83,7 +83,7 @@ extern "C" void mgpuMemHostRegister(void *ptr, uint64_t sizeBytes) {
 // Allows to register a MemRef with the CUDA runtime. Initializes array with
 // value. Helpful until we have transfer functions implemented.
 template <typename T>
-void mcuMemHostRegisterMemRef(const DynamicMemRefType<T> &mem_ref, T value) {
+void mgpuMemHostRegisterMemRef(const DynamicMemRefType<T> &mem_ref, T value) {
   llvm::SmallVector<int64_t, 4> denseStrides(mem_ref.rank);
   llvm::ArrayRef<int64_t> sizes(mem_ref.sizes, mem_ref.rank);
   llvm::ArrayRef<int64_t> strides(mem_ref.strides, mem_ref.rank);
@@ -103,12 +103,12 @@ void mcuMemHostRegisterMemRef(const DynamicMemRefType<T> &mem_ref, T value) {
   mgpuMemHostRegister(pointer, count * sizeof(T));
 }
 
-extern "C" void mcuMemHostRegisterFloat(int64_t rank, void *ptr) {
+extern "C" void mgpuMemHostRegisterFloat(int64_t rank, void *ptr) {
   UnrankedMemRefType<float> mem_ref = {rank, ptr};
-  mcuMemHostRegisterMemRef(DynamicMemRefType<float>(mem_ref), 1.23f);
+  mgpuMemHostRegisterMemRef(DynamicMemRefType<float>(mem_ref), 1.23f);
 }
 
-extern "C" void mcuMemHostRegisterInt32(int64_t rank, void *ptr) {
+extern "C" void mgpuMemHostRegisterInt32(int64_t rank, void *ptr) {
   UnrankedMemRefType<int32_t> mem_ref = {rank, ptr};
-  mcuMemHostRegisterMemRef(DynamicMemRefType<int32_t>(mem_ref), 123);
+  mgpuMemHostRegisterMemRef(DynamicMemRefType<int32_t>(mem_ref), 123);
 }