[Mlir-commits] [mlir] 7085cb6 - [mlir][NvGpuToNVVM] Fix byte size calculation in async copy lowering
Christopher Bate
llvmlistbot at llvm.org
Mon May 23 09:56:30 PDT 2022
Author: Christopher Bate
Date: 2022-05-23T10:53:51-06:00
New Revision: 7085cb6011d4593f39c6c3369d1e29ff08edc514
URL: https://github.com/llvm/llvm-project/commit/7085cb6011d4593f39c6c3369d1e29ff08edc514
DIFF: https://github.com/llvm/llvm-project/commit/7085cb6011d4593f39c6c3369d1e29ff08edc514.diff
LOG: [mlir][NvGpuToNVVM] Fix byte size calculation in async copy lowering
AsyncCopyOp lowering converted "size in elements" to "size in bytes"
assuming the element type size is at least one byte. This removes
that restriction, allowing for types such as i4 and b1 to be handled
correctly.
Differential Revision: https://reviews.llvm.org/D125838
Added:
Modified:
mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index ccf85915e49fa..7ee7dce361f30 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -381,7 +381,7 @@ struct NVGPUAsyncCopyLowering
scrPtr);
int64_t numElements = adaptor.numElements().getZExtValue();
int64_t sizeInBytes =
- (dstMemrefType.getElementTypeBitWidth() / 8) * numElements;
+ (dstMemrefType.getElementTypeBitWidth() * numElements) / 8;
// bypass L1 is only supported for byte sizes of 16, we drop the hint
// otherwise.
UnitAttr bypassL1 = sizeInBytes == 16 ? adaptor.bypassL1Attr() : UnitAttr();
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index 7bd02b7413117..8a8d6d5bca06d 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -267,3 +267,28 @@ func.func @async_cp(
return
}
+// -----
+
+// CHECK-LABEL: @async_cp_i4(
+// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index)
+func.func @async_cp_i4(
+ %src: memref<128x64xi4>, %dst: memref<128x128xi4, 3>, %i : index) -> !nvgpu.device.async.token {
+ // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
+ // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i4, 3>, ptr<i4, 3>, i64, array<2 x i64>, array<2 x i64>)>
+ // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(128 : index) : i64
+ // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64
+ // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[IDX1]] : i64
+ // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI1]]] : (!llvm.ptr<i4, 3>, i64) -> !llvm.ptr<i4, 3>
+ // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr<i4, 3> to !llvm.ptr<i8, 3>
+ // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i4>, ptr<i4>, i64, array<2 x i64>, array<2 x i64>)>
+ // CHECK-DAG: %[[S2:.*]] = llvm.mlir.constant(64 : index) : i64
+ // CHECK-DAG: %[[FI2:.*]] = llvm.mul %[[IDX1]], %[[S2]] : i64
+ // CHECK-DAG: %[[FI3:.*]] = llvm.add %[[FI2]], %[[IDX1]] : i64
+ // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI3]]] : (!llvm.ptr<i4>, i64) -> !llvm.ptr<i4>
+ // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr<i4> to !llvm.ptr<i8>
+ // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr<i8> to !llvm.ptr<i8, 1>
+ // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16
+ %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i], 32 : memref<128x64xi4> to memref<128x128xi4, 3>
+ return %0 : !nvgpu.device.async.token
+}
+
More information about the Mlir-commits
mailing list