[Mlir-commits] [mlir] [mlir][llvm] Implement ConstantLike for ZeroOp, UndefOp, PoisonOp (PR #93690)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed May 29 07:20:36 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-gpu
@llvm/pr-subscribers-mlir-llvm
Author: Guy David (guy-david)
<details>
<summary>Changes</summary>
These act as constants and should be propagated whenever possible. It is safe to do so for mlir.undef and mlir.poison because they remain "dirty" through out their lifetime and can be duplicated, merged, etc. per the LangRef.
---
Patch is 28.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93690.diff
8 Files Affected:
- (modified) mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td (+24)
- (modified) mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td (+6-3)
- (modified) mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp (+36-2)
- (modified) mlir/test/Dialect/GPU/dynamic-shared-memory.mlir (+63-63)
- (modified) mlir/test/Dialect/LLVMIR/constant-folding.mlir (+68)
- (modified) mlir/test/Dialect/SparseTensor/conversion.mlir (+1-1)
- (modified) mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir (+2-2)
- (modified) mlir/test/Dialect/SparseTensor/specifier_to_llvm.mlir (+6-6)
``````````diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
index 535cf8dfd2ced..9ac8fd2dba9a7 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
@@ -1037,4 +1037,28 @@ def LLVM_TargetFeaturesAttr : LLVM_Attr<"TargetFeatures", "target_features">
let genVerifyDecl = 1;
}
+//===----------------------------------------------------------------------===//
+// UndefAttr
+//===----------------------------------------------------------------------===//
+
+/// Folded into from LLVM::UndefOp.
+def LLVM_UndefAttr : LLVM_Attr<"Undef", "undef"> {
+}
+
+//===----------------------------------------------------------------------===//
+// PoisonAttr
+//===----------------------------------------------------------------------===//
+
+/// Folded into from LLVM::PoisonOp.
+def LLVM_PoisonAttr : LLVM_Attr<"Poison", "poison"> {
+}
+
+//===----------------------------------------------------------------------===//
+// ZeroAttr
+//===----------------------------------------------------------------------===//
+
+/// Folded into from LLVM::ZeroOp.
+def LLVM_ZeroAttr : LLVM_Attr<"Zero", "zero"> {
+}
+
#endif // LLVMIR_ATTRDEFS
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 84e67d2c11dbd..f6f907f39a4b4 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1522,7 +1522,7 @@ def LLVM_NoneTokenOp
let assemblyFormat = "attr-dict `:` type($res)";
}
-def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure]>,
+def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure, ConstantLike]>,
LLVM_Builder<"$res = llvm::UndefValue::get($_resultType);"> {
let summary = "Creates an undefined value of LLVM dialect type.";
let description = [{
@@ -1541,9 +1541,10 @@ def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure]>,
let results = (outs LLVM_Type:$res);
let builders = [LLVM_OneResultOpBuilder];
let assemblyFormat = "attr-dict `:` type($res)";
+ let hasFolder = 1;
}
-def LLVM_PoisonOp : LLVM_Op<"mlir.poison", [Pure]>,
+def LLVM_PoisonOp : LLVM_Op<"mlir.poison", [Pure, ConstantLike]>,
LLVM_Builder<"$res = llvm::PoisonValue::get($_resultType);"> {
let summary = "Creates a poison value of LLVM dialect type.";
let description = [{
@@ -1563,10 +1564,11 @@ def LLVM_PoisonOp : LLVM_Op<"mlir.poison", [Pure]>,
let results = (outs LLVM_Type:$res);
let builders = [LLVM_OneResultOpBuilder];
let assemblyFormat = "attr-dict `:` type($res)";
+ let hasFolder = 1;
}
def LLVM_ZeroOp
- : LLVM_Op<"mlir.zero", [Pure]>,
+ : LLVM_Op<"mlir.zero", [Pure, ConstantLike]>,
LLVM_Builder<"$res = llvm::Constant::getNullValue($_resultType);">
{
let summary = "Creates a zero-initialized value of LLVM dialect type.";
@@ -1588,6 +1590,7 @@ def LLVM_ZeroOp
let builders = [LLVM_OneResultOpBuilder];
let assemblyFormat = "attr-dict `:` type($res)";
let hasVerifier = 1;
+ let hasFolder = 1;
}
def LLVM_ConstantOp
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index dcf3f3b52a606..d91f57f9247ab 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -2555,6 +2555,24 @@ Region *LLVMFuncOp::getCallableRegion() {
return &getBody();
}
+//===----------------------------------------------------------------------===//
+// UndefOp.
+//===----------------------------------------------------------------------===//
+
+/// Fold an undef operation to a dedicated undef attribute.
+OpFoldResult LLVM::UndefOp::fold(FoldAdaptor) {
+ return mlir::LLVM::UndefAttr::get(getContext());
+}
+
+//===----------------------------------------------------------------------===//
+// PoisonOp.
+//===----------------------------------------------------------------------===//
+
+/// Fold a poison operation to a dedicated poison attribute.
+OpFoldResult LLVM::PoisonOp::fold(FoldAdaptor) {
+ return mlir::LLVM::PoisonAttr::get(getContext());
+}
+
//===----------------------------------------------------------------------===//
// ZeroOp.
//===----------------------------------------------------------------------===//
@@ -2568,6 +2586,15 @@ LogicalResult LLVM::ZeroOp::verify() {
return success();
}
+/// Fold a zero operation to a builtin zero attribute when possible and fall
+/// back to a dedicated zero attribute.
+OpFoldResult LLVM::ZeroOp::fold(FoldAdaptor) {
+ OpFoldResult result = Builder(getContext()).getZeroAttr(getType());
+ if (result)
+ return result;
+ return mlir::LLVM::ZeroAttr::get(getContext());
+}
+
//===----------------------------------------------------------------------===//
// ConstantOp.
//===----------------------------------------------------------------------===//
@@ -3271,11 +3298,18 @@ LogicalResult LLVMDialect::verifyRegionResultAttribute(Operation *op,
Operation *LLVMDialect::materializeConstant(OpBuilder &builder, Attribute value,
Type type, Location loc) {
- // If this was folded from an llvm.mlir.addressof operation, it should be
- // materialized as such.
+ // If this was folded from an operation other than llvm.mlir.constant, it
+ // should be materialized as such. Note that an llvm.mlir.zero may fold into
+ // a builtin zero attribute and thus will materialize as a llvm.mlir.constant.
if (auto symbol = dyn_cast<FlatSymbolRefAttr>(value))
if (isa<LLVM::LLVMPointerType>(type))
return builder.create<LLVM::AddressOfOp>(loc, type, symbol);
+ if (isa<LLVM::UndefAttr>(value))
+ return builder.create<LLVM::UndefOp>(loc, type);
+ if (isa<LLVM::PoisonAttr>(value))
+ return builder.create<LLVM::PoisonOp>(loc, type);
+ if (isa<LLVM::ZeroAttr>(value))
+ return builder.create<LLVM::ZeroOp>(loc, type);
// Otherwise try materializing it as a regular llvm.mlir.constant op.
return LLVM::ConstantOp::materialize(builder, value, type, loc);
}
diff --git a/mlir/test/Dialect/GPU/dynamic-shared-memory.mlir b/mlir/test/Dialect/GPU/dynamic-shared-memory.mlir
index fb45faaa712f7..d73125fd763e6 100644
--- a/mlir/test/Dialect/GPU/dynamic-shared-memory.mlir
+++ b/mlir/test/Dialect/GPU/dynamic-shared-memory.mlir
@@ -3,11 +3,11 @@
gpu.module @modules {
// CHECK: llvm.mlir.global internal @__dynamic_shmem__3() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8>
- llvm.mlir.global internal @__dynamic_shmem__1() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8>
- llvm.mlir.global internal @__dynamic_shmem__2() {alignment = 16 : i64} : !llvm.array<0 x i8>
+ llvm.mlir.global internal @__dynamic_shmem__1() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8>
+ llvm.mlir.global internal @__dynamic_shmem__2() {alignment = 16 : i64} : !llvm.array<0 x i8>
// CHECK-LABEL: llvm.func @dynamic_shared_memory_kernel(
// CHECK-SAME: %[[arg0:.+]]: i64)
- gpu.func @dynamic_shared_memory_kernel(%d : index) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 1, 1, 1>} {
+ gpu.func @dynamic_shared_memory_kernel(%d : index) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 1, 1, 1>} {
%c1 = arith.constant 1 : index
%c8192 = arith.constant 8192 : index
%c16384 = arith.constant 16384 : index
@@ -19,83 +19,83 @@ gpu.module @modules {
%1 = memref.view %shmem[%c16384][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>>
"test.use.shared.memory"(%1) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
-
-// CHECK: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64
-// CHECK: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64
-// CHECK: %[[S2:.+]] = llvm.mlir.constant(1 : index) : i64
-// CHECK: %[[S3:.+]] = llvm.mlir.constant(0 : index) : i64
-// CHECK: %[[S4:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3>
-// CHECK: %[[S5:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S4]], %[[S5]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S4]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
-// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S3]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S2]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %[[S13]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
-// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
-// CHECK: %[[S15:.+]] = llvm.getelementptr %4[16384] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
-// CHECK: %[[S16:.+]] = llvm.insertvalue %[[S15]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S17:.+]] = llvm.insertvalue %[[S3]], %[[S16]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S18:.+]] = llvm.insertvalue %[[S1]], %[[S17]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S19:.+]] = llvm.insertvalue %[[S2]], %[[S18]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S20:.+]] = llvm.insertvalue %[[S0]], %[[S19]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S21:.+]] = llvm.insertvalue %[[S1]], %[[S20]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S22:.+]] = builtin.unrealized_conversion_cast %[[S21]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
-// CHECK: "test.use.shared.memory"(%[[S22]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
+
+// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64
+// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64
+// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3>
+// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
+// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %[[S13]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
+// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
+// CHECK: %[[S15:.+]] = llvm.getelementptr %[[S5]][16384] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
+// CHECK: %[[S16:.+]] = llvm.insertvalue %[[S15]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S17:.+]] = llvm.insertvalue %[[S4]], %[[S16]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S18:.+]] = llvm.insertvalue %[[S1]], %[[S17]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S19:.+]] = llvm.insertvalue %[[S3]], %[[S18]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S20:.+]] = llvm.insertvalue %[[S0]], %[[S19]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S21:.+]] = llvm.insertvalue %[[S1]], %[[S20]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S22:.+]] = builtin.unrealized_conversion_cast %[[S21]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
+// CHECK: "test.use.shared.memory"(%[[S22]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
gpu.return
}
// CHECK-LABEL: llvm.func @gpu_device_function
- gpu.func @gpu_device_function() {
+ gpu.func @gpu_device_function() {
%c8192 = arith.constant 8192 : index
%shmem = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
%0 = memref.view %shmem[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>>
"test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
-// CHECK: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64
-// CHECK: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64
-// CHECK: %[[S2:.+]] = llvm.mlir.constant(1 : index) : i64
-// CHECK: %[[S3:.+]] = llvm.mlir.constant(0 : index) : i64
-// CHECK: %[[S4:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3>
-// CHECK: %[[S5:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S4]], %[[S5]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S4]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
-// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S3]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S2]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
-// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
+// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64
+// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64
+// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3>
+// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
+// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
+// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
gpu.return
}
// CHECK-LABEL: llvm.func @func_device_function
- func.func @func_device_function() {
+ func.func @func_device_function() {
%c8192 = arith.constant 8192 : index
%shmem = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
%0 = memref.view %shmem[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>>
"test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
-// CHECK: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64
-// CHECK: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64
-// CHECK: %[[S2:.+]] = llvm.mlir.constant(1 : index) : i64
-// CHECK: %[[S3:.+]] = llvm.mlir.constant(0 : index) : i64
-// CHECK: %[[S4:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3>
-// CHECK: %[[S5:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S4]], %[[S5]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S4]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
-// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S3]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S2]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
-// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
+// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64
+// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64
+// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(p...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/93690
More information about the Mlir-commits
mailing list