[Mlir-commits] [mlir] 828b476 - Revert "[mlir][GPU] Add known_block_size and known_grid_size to gpu.func"

Fri Dec 23 17:30:24 PST 2022

Author: Stella Stamenova
Date: 2022-12-23T17:29:42-08:00
New Revision: 828b4762caf41d657977ed316133ae5e7396298e

URL: https://github.com/llvm/llvm-project/commit/828b4762caf41d657977ed316133ae5e7396298e
DIFF: https://github.com/llvm/llvm-project/commit/828b4762caf41d657977ed316133ae5e7396298e.diff

LOG: Revert "[mlir][GPU] Add known_block_size and known_grid_size to gpu.func"

This reverts commit 85e38d7cd670371206f6067772dc822049d2cbd8.

This broke the windows mlir buildbot:
https://lab.llvm.org/buildbot/#/builders/13/builds/30180/steps/6/logs/stdio

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
    mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
    mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
    mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
    mlir/test/Dialect/GPU/int-range-interface.mlir
    mlir/test/Dialect/GPU/invalid.mlir
    mlir/test/Dialect/GPU/outlining.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 44423078ff924..baf9540c8b695 100644

--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -205,14 +205,6 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
     coordinate work items. Declarations of GPU functions, i.e. not having the
     body region, are not supported.
 
-    A function may optionally be annotated with the block and/or grid sizes
-    that will be used when it is launched using the `gpu.known_block_size` and
-    `gpu.known_grid_size` attributes, respectively. If set, these attributes must
-    be arrays of three 32-bit integers giving the x, y, and z launch dimensions.
-    Launching a kernel that has these annotations, or that calls a function with
-    these annotations, using a block size or grid size other than what is specified
-    is undefined behavior.
-
     Syntax:
 
     ```
@@ -319,36 +311,6 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
       return "workgroup_attributions";
     }
 
-    static constexpr StringLiteral getKnownBlockSizeAttrName() {
-      return StringLiteral("gpu.known_block_size");
-    }
-
-    static constexpr StringLiteral getKnownGridSizeAttrName() {
-      return StringLiteral("gpu.known_grid_size");
-    }
-
-    /// Returns the block size this kernel will be launched with along
-    /// dimension `dim` if known. The value of gpu.thread_id dim will be strictly
-    /// less than this size.
-    Optional<uint32_t> getKnownBlockSize(gpu::Dimension dim) {
-      if (auto array =
-        (*this)->getAttrOfType<DenseI32ArrayAttr>(getKnownBlockSizeAttrName())) {
-        return array[static_cast<uint32_t>(dim)];
-      }
-      return std::nullopt;
-    }
-
-    /// Returns the grid size this kernel will be launched with along
-    /// dimension `dim` if known. The value of gpu.block_id dim will be strictly
-    /// less than this size.
-    Optional<uint32_t> getKnownGridSize(gpu::Dimension dim) {
-      if (auto array =
-        (*this)->getAttrOfType<DenseI32ArrayAttr>(getKnownGridSizeAttrName())) {
-        return array[static_cast<uint32_t>(dim)];
-      }
-      return std::nullopt;
-    }
-
     /// Returns the argument types of this function.
     ArrayRef<Type> getArgumentTypes() { return getFunctionType().getInputs(); }
 
@@ -367,8 +329,6 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
     LogicalResult verifyBody();
   }];
   let hasCustomAssemblyFormat = 1;
-
-  let hasVerifier = 1;
 }
 
 def GPU_LaunchFuncOp : GPU_Op<"launch_func",

diff  --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index d687043c22f79..e1d92b9eac315 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -16,7 +16,6 @@
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/DialectImplementation.h"
@@ -1058,27 +1057,6 @@ LogicalResult GPUFuncOp::verifyBody() {
   return success();
 }
 
-static LogicalResult verifyKnownLaunchSizeAttr(gpu::GPUFuncOp op,
-                                               StringRef attrName) {
-  auto maybeAttr = op->getAttr(attrName);
-  if (!maybeAttr)
-    return success();
-  auto array = maybeAttr.dyn_cast<DenseI32ArrayAttr>();
-  if (!array)
-    return op.emitOpError(attrName + " must be a dense i32 array");
-  if (array.size() != 3)
-    return op.emitOpError(attrName + " must contain exactly 3 elements");
-  return success();
-}
-
-LogicalResult GPUFuncOp::verify() {
-  if (failed(verifyKnownLaunchSizeAttr(*this, getKnownBlockSizeAttrName())))
-    return failure();
-  if (failed(verifyKnownLaunchSizeAttr(*this, getKnownGridSizeAttrName())))
-    return failure();
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // ReturnOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
index d41823b6b4b2e..3df44a29296ba 100644
--- a/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
+++ b/mlir/lib/Dialect/GPU/IR/InferIntRangeInterfaceImpls.cpp
@@ -7,11 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
-#include "mlir/IR/Matchers.h"
 #include "mlir/Interfaces/InferIntRangeInterface.h"
-#include "llvm/ADT/STLForwardCompat.h"
-#include "llvm/Support/MathExtras.h"
-#include <optional>
 
 using namespace mlir;
 using namespace mlir::gpu;
@@ -27,107 +23,40 @@ static ConstantIntRanges getIndexRange(uint64_t umin, uint64_t umax) {
                                          APInt(width, umax));
 }
 
-namespace {
-enum class LaunchDims : uint32_t { Block = 0, Grid = 1 };
-} // end namespace
-
-/// If the operation `op` is in a context that is annotated with maximum
-/// launch dimensions (a launch op with constant block or grid
-/// sizes or a launch_func op with the appropriate dimensions), return
-/// the bound on the maximum size of the dimension that the op is querying.
-/// IDs will be one less than this bound.
-
-static Value valueByDim(KernelDim3 dims, Dimension dim) {
-  switch (dim) {
-  case Dimension::x:
-    return dims.x;
-  case Dimension::y:
-    return dims.y;
-  case Dimension::z:
-    return dims.z;
-  }
-}
-
-static uint64_t zext(uint32_t arg) { return static_cast<uint64_t>(arg); }
-
-template <typename Op>
-static Optional<uint64_t> getKnownLaunchDim(Op op, LaunchDims type) {
-  Dimension dim = op.getDimension();
-  if (auto launch = op->template getParentOfType<LaunchOp>()) {
-    KernelDim3 bounds;
-    switch (type) {
-    case LaunchDims::Block:
-      bounds = launch.getBlockSizeOperandValues();
-      break;
-    case LaunchDims::Grid:
-      bounds = launch.getGridSizeOperandValues();
-      break;
-    }
-    Value maybeBound = valueByDim(bounds, dim);
-    APInt value;
-    if (matchPattern(maybeBound, m_ConstantInt(&value)))
-      return value.getZExtValue();
-  }
-
-  if (auto func = op->template getParentOfType<GPUFuncOp>()) {
-    switch (type) {
-    case LaunchDims::Block:
-      return llvm::transformOptional(func.getKnownBlockSize(dim), zext);
-    case LaunchDims::Grid:
-      return llvm::transformOptional(func.getKnownGridSize(dim), zext);
-    }
-  }
-  return std::nullopt;
-}
-
 void BlockDimOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
                                    SetIntRangeFn setResultRange) {
-  Optional<uint64_t> knownVal = getKnownLaunchDim(*this, LaunchDims::Block);
-  if (knownVal)
-    setResultRange(getResult(), getIndexRange(*knownVal, *knownVal));
-  else
-    setResultRange(getResult(), getIndexRange(1, kMaxDim));
+  setResultRange(getResult(), getIndexRange(1, kMaxDim));
 }
 
 void BlockIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
                                   SetIntRangeFn setResultRange) {
-  uint64_t max = getKnownLaunchDim(*this, LaunchDims::Grid).value_or(kMaxDim);
-  setResultRange(getResult(), getIndexRange(0, max - 1ULL));
+  setResultRange(getResult(), getIndexRange(0, kMaxDim - 1));
 }
 
 void GridDimOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
                                   SetIntRangeFn setResultRange) {
-  Optional<uint64_t> knownVal = getKnownLaunchDim(*this, LaunchDims::Grid);
-  if (knownVal)
-    setResultRange(getResult(), getIndexRange(*knownVal, *knownVal));
-  else
-    setResultRange(getResult(), getIndexRange(1, kMaxDim));
+  setResultRange(getResult(), getIndexRange(1, kMaxDim));
 }
 
 void ThreadIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
                                    SetIntRangeFn setResultRange) {
-  uint64_t max = getKnownLaunchDim(*this, LaunchDims::Block).value_or(kMaxDim);
-  setResultRange(getResult(), getIndexRange(0, max - 1ULL));
+  setResultRange(getResult(), getIndexRange(0, kMaxDim - 1));
 }
 
 void LaneIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
                                  SetIntRangeFn setResultRange) {
-  setResultRange(getResult(), getIndexRange(0, kMaxSubgroupSize - 1ULL));
+  setResultRange(getResult(), getIndexRange(0, kMaxSubgroupSize - 1));
 }
 
 void SubgroupIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
                                      SetIntRangeFn setResultRange) {
-  setResultRange(getResult(), getIndexRange(0, kMaxDim - 1ULL));
+  setResultRange(getResult(), getIndexRange(0, kMaxDim - 1));
 }
 
 void GlobalIdOp::inferResultRanges(ArrayRef<ConstantIntRanges>,
                                    SetIntRangeFn setResultRange) {
-  uint64_t blockDimMax =
-      getKnownLaunchDim(*this, LaunchDims::Block).value_or(kMaxDim);
-  uint64_t gridDimMax =
-      getKnownLaunchDim(*this, LaunchDims::Grid).value_or(kMaxDim);
   setResultRange(getResult(),
-                 getIndexRange(0, (blockDimMax * gridDimMax) - 1ULL));
+                 getIndexRange(0, std::numeric_limits<int64_t>::max()));
 }
 
 void NumSubgroupsOp::inferResultRanges(ArrayRef<ConstantIntRanges>,

diff  --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index e8883ea7c8eb7..fadae79eff85b 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -22,12 +22,10 @@
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/SymbolTable.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/RegionUtils.h"
-#include <limits>
 
 namespace mlir {
 #define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONS
@@ -149,27 +147,8 @@ LogicalResult mlir::sinkOperationsIntoLaunchOp(
   return success();
 }
 
-/// Return the provided KernelDim3 as an array of i32 constants if possible.
-static DenseI32ArrayAttr maybeConstantDimsAttr(gpu::KernelDim3 dims) {
-  SmallVector<int32_t, 3> constants;
-  MLIRContext *ctx = dims.x.getContext();
-  for (Value v : {dims.x, dims.y, dims.z}) {
-    APInt constValue;
-    if (!matchPattern(v, m_ConstantInt(&constValue)))
-      return nullptr;
-    // In the event someone called for a too-large block or grid dimension,
-    // don't set bounds as it is likely to cause more confusing behavior.
-    if (constValue.ugt(std::numeric_limits<uint32_t>::max()))
-      return nullptr;
-    constants.push_back(
-        constValue.getLimitedValue(std::numeric_limits<uint32_t>::max()));
-  }
-  return DenseI32ArrayAttr::get(ctx, constants);
-}
-
 /// Outline the `gpu.launch` operation body into a kernel function. Replace
 /// `gpu.terminator` operations by `gpu.return` in the generated function.
-/// Set block and grid size bounds if known.
 static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
                                             StringRef kernelFnName,
                                             SetVector<Value> &operands) {
@@ -194,19 +173,6 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
   auto outlinedFunc = builder.create<gpu::GPUFuncOp>(loc, kernelFnName, type);
   outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
                         builder.getUnitAttr());
-
-  // If we can infer bounds on the grid and/or block sizes from the arguments
-  // to the launch op, propagate them to the generated kernel. This is safe
-  // because multiple launches with the same body are not deduplicated.
-  if (auto blockBounds =
-          maybeConstantDimsAttr(launchOp.getBlockSizeOperandValues()))
-    outlinedFunc->setAttr(gpu::GPUFuncOp::getKnownBlockSizeAttrName(),
-                          blockBounds);
-  if (auto gridBounds =
-          maybeConstantDimsAttr(launchOp.getGridSizeOperandValues()))
-    outlinedFunc->setAttr(gpu::GPUFuncOp::getKnownGridSizeAttrName(),
-                          gridBounds);
-
   BlockAndValueMapping map;
 
   // Map the arguments corresponding to the launch parameters like blockIdx,

diff  --git a/mlir/test/Dialect/GPU/int-range-interface.mlir b/mlir/test/Dialect/GPU/int-range-interface.mlir
index 02aec9dc0476f..2c5af0886e9f5 100644
--- a/mlir/test/Dialect/GPU/int-range-interface.mlir
+++ b/mlir/test/Dialect/GPU/int-range-interface.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -test-int-range-inference -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -test-int-range-inference %s | FileCheck %s
 
 // CHECK-LABEL: func @launch_func
 func.func @launch_func(%arg0 : index) {
@@ -41,18 +41,12 @@ func.func @launch_func(%arg0 : index) {
     %thread_id_y0 = test.reflect_bounds %thread_id_y
     %thread_id_z0 = test.reflect_bounds %thread_id_z
 
-    // The launch bounds are not constant, and so this can't infer anything
-    // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index}
-    %thread_id_op = gpu.thread_id y
-    %thread_id_op0 = test.reflect_bounds %thread_id_op
     gpu.terminator
   }
 
   func.return
 }
 
-// -----
-
 // CHECK-LABEL: func @kernel
 module attributes {gpu.container_module} {
   gpu.module @gpu_module {
@@ -106,9 +100,9 @@ module attributes {gpu.container_module} {
       %global_id_y = gpu.global_id y
       %global_id_z = gpu.global_id z
 
-      // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = -9223372036854775808 : index, umax = -8589934592 : index, umin = 0 : index}
-      // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = -9223372036854775808 : index, umax = -8589934592 : index, umin = 0 : index}
-      // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = -9223372036854775808 : index, umax = -8589934592 : index, umin = 0 : index}
+      // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = 0 : index, umax = 9223372036854775807 : index, umin = 0 : index}
+      // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = 0 : index, umax = 9223372036854775807 : index, umin = 0 : index}
+      // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = 0 : index, umax = 9223372036854775807 : index, umin = 0 : index}
       %global_id_x0 = test.reflect_bounds %global_id_x
       %global_id_y0 = test.reflect_bounds %global_id_y
       %global_id_z0 = test.reflect_bounds %global_id_z
@@ -132,86 +126,3 @@ module attributes {gpu.container_module} {
   }
 }
 
-// -----
-
-// CHECK-LABEL: func @annotated_kernel
-module attributes {gpu.container_module} {
-  gpu.module @gpu_module {
-    gpu.func @annotated_kernel() kernel
-      attributes {gpu.known_block_size = array<i32: 8, 12, 16>,
-          gpu.known_grid_size = array<i32: 20, 24, 28>} {
-
-      %grid_dim_x = gpu.grid_dim x
-      %grid_dim_y = gpu.grid_dim y
-      %grid_dim_z = gpu.grid_dim z
-
-      // CHECK: test.reflect_bounds {smax = 20 : index, smin = 20 : index, umax = 20 : index, umin = 20 : index}
-      // CHECK: test.reflect_bounds {smax = 24 : index, smin = 24 : index, umax = 24 : index, umin = 24 : index}
-      // CHECK: test.reflect_bounds {smax = 28 : index, smin = 28 : index, umax = 28 : index, umin = 28 : index}
-      %grid_dim_x0 = test.reflect_bounds %grid_dim_x
-      %grid_dim_y0 = test.reflect_bounds %grid_dim_y
-      %grid_dim_z0 = test.reflect_bounds %grid_dim_z
-
-      %block_id_x = gpu.block_id x
-      %block_id_y = gpu.block_id y
-      %block_id_z = gpu.block_id z
-
-      // CHECK: test.reflect_bounds {smax = 19 : index, smin = 0 : index, umax = 19 : index, umin = 0 : index}
-      // CHECK: test.reflect_bounds {smax = 23 : index, smin = 0 : index, umax = 23 : index, umin = 0 : index}
-      // CHECK: test.reflect_bounds {smax = 27 : index, smin = 0 : index, umax = 27 : index, umin = 0 : index}
-      %block_id_x0 = test.reflect_bounds %block_id_x
-      %block_id_y0 = test.reflect_bounds %block_id_y
-      %block_id_z0 = test.reflect_bounds %block_id_z
-
-      %block_dim_x = gpu.block_dim x
-      %block_dim_y = gpu.block_dim y
-      %block_dim_z = gpu.block_dim z
-
-      // CHECK: test.reflect_bounds {smax = 8 : index, smin = 8 : index, umax = 8 : index, umin = 8 : index}
-      // CHECK: test.reflect_bounds {smax = 12 : index, smin = 12 : index, umax = 12 : index, umin = 12 : index}
-      // CHECK: test.reflect_bounds {smax = 16 : index, smin = 16 : index, umax = 16 : index, umin = 16 : index}
-      %block_dim_x0 = test.reflect_bounds %block_dim_x
-      %block_dim_y0 = test.reflect_bounds %block_dim_y
-      %block_dim_z0 = test.reflect_bounds %block_dim_z
-
-      %thread_id_x = gpu.thread_id x
-      %thread_id_y = gpu.thread_id y
-      %thread_id_z = gpu.thread_id z
-
-      // CHECK: test.reflect_bounds {smax = 7 : index, smin = 0 : index, umax = 7 : index, umin = 0 : index}
-      // CHECK: test.reflect_bounds {smax = 11 : index, smin = 0 : index, umax = 11 : index, umin = 0 : index}
-      // CHECK: test.reflect_bounds {smax = 15 : index, smin = 0 : index, umax = 15 : index, umin = 0 : index}
-      %thread_id_x0 = test.reflect_bounds %thread_id_x
-      %thread_id_y0 = test.reflect_bounds %thread_id_y
-      %thread_id_z0 = test.reflect_bounds %thread_id_z
-
-      %global_id_x = gpu.global_id x
-      %global_id_y = gpu.global_id y
-      %global_id_z = gpu.global_id z
-
-      // CHECK: test.reflect_bounds {smax = 159 : index, smin = 0 : index, umax = 159 : index, umin = 0 : index}
-      // CHECK: test.reflect_bounds {smax = 287 : index, smin = 0 : index, umax = 287 : index, umin = 0 : index}
-      // CHECK: test.reflect_bounds {smax = 447 : index, smin = 0 : index, umax = 447 : index, umin = 0 : index}
-      %global_id_x0 = test.reflect_bounds %global_id_x
-      %global_id_y0 = test.reflect_bounds %global_id_y
-      %global_id_z0 = test.reflect_bounds %global_id_z
-
-      %subgroup_size = gpu.subgroup_size : index
-      %lane_id = gpu.lane_id
-      %num_subgroups = gpu.num_subgroups : index
-      %subgroup_id = gpu.subgroup_id : index
-
-      // CHECK: test.reflect_bounds {smax = 128 : index, smin = 1 : index, umax = 128 : index, umin = 1 : index}
-      // CHECK: test.reflect_bounds {smax = 127 : index, smin = 0 : index, umax = 127 : index, umin = 0 : index}
-      // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index}
-      // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index}
-      %subgroup_size0 = test.reflect_bounds %subgroup_size
-      %lane_id0 = test.reflect_bounds %lane_id
-      %num_subgroups0 = test.reflect_bounds %num_subgroups
-      %subgroup_id0 = test.reflect_bounds %subgroup_id
-
-      gpu.return
-    }
-  }
-}
-

diff  --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 76a14d353bc4f..7a11acbc2d239 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -599,25 +599,3 @@ func.func @alloc() {
    %1 = gpu.alloc(%0) : memref<2x?x?xf32, 1>
    return
 }
-
-// -----
-
-module attributes {gpu.container_module} {
-  gpu.module @kernel {
-    // expected-error at +1 {{'gpu.func' op gpu.known_block_size must be a dense i32 array}}
-    gpu.func @kernel() kernel attributes {gpu.known_block_size = 32 : i32} {
-      gpu.return
-    }
-  }
-}
-
-// -----
-
-module attributes {gpu.container_module} {
-  gpu.module @kernel {
-    // expected-error at +1 {{'gpu.func' op gpu.known_block_size must contain exactly 3 elements}}
-    gpu.func @kernel() kernel attributes {gpu.known_block_size = array<i32: 2, 1>} {
-      gpu.return
-    }
-  }
-}

diff  --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 422e0c154dd47..5191dcf8fffb8 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -41,8 +41,6 @@ func.func @launch() {
 // CHECK-LABEL: gpu.module @launch_kernel
 // CHECK-NEXT: gpu.func @launch_kernel
 // CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref<?xf32, 1>)
-// CHECK-SAME: gpu.known_block_size = array<i32: 20, 24, 28>
-// CHECK-SAME: gpu.known_grid_size = array<i32: 8, 12, 16>
 // CHECK-NEXT: %[[BID:.*]] = gpu.block_id x
 // CHECK-NEXT: = gpu.block_id y
 // CHECK-NEXT: = gpu.block_id z
@@ -293,20 +291,3 @@ func.func @recursive_device_function() {
 // CHECK:   func @device_function()
 // CHECK:   func @recursive_device_function()
 // CHECK-NOT:   func @device_function
-
-// -----
-
-// CHECK-LABEL: @non_constant_launches
-func.func @non_constant_launches(%arg0 : index) {
-  // CHECK-NOT: gpu.known_block_size
-  // CHECK-NOT: gpu.known_grid_size
-  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %arg0, %grid_y = %arg0,
-                                       %grid_z = %arg0)
-             threads(%tx, %ty, %tz) in (%block_x = %arg0, %block_y = %arg0,
-                                        %block_z = %arg0) {
-    gpu.terminator
-  }
-  return
-}
-
-// CHECK-DL-LABEL: gpu.module @non_constant_launches_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}