[Mlir-commits] [mlir] [mlir][AMDGPU] Avoid verifier crash in DPPOp on vector operand types (PR #178887)

Sat Feb 7 03:33:40 PST 2026

https://github.com/Ayush3941 updated https://github.com/llvm/llvm-project/pull/178887

>From af7f96587dec42caab0b1be1488ec5e7b214da9d Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Fri, 30 Jan 2026 08:10:50 -0500
Subject: [PATCH 1/9] [mlir][AMDGPU] Fix DPPOp verifier crash on vector operand
 types

---
 mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp            |  4 ++--
 .../AMDGPU/dpp-verify-no-assert-on-vectors.mlir     | 13 +++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 mlir/test/Dialect/AMDGPU/dpp-verify-no-assert-on-vectors.mlir

diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp
index 87a813a31608d..ec44310bd0e93 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp
@@ -675,11 +675,11 @@ LogicalResult SparseMFMAOp::verify() {
 //===----------------------------------------------------------------------===//
 LogicalResult DPPOp::verify() {
   Type srcType = getSrc().getType();
-  if (srcType.getIntOrFloatBitWidth() > 64) {
+  Type elemType = getElementTypeOrSelf(srcType); 
+  if (elemType.getIntOrFloatBitWidth() > 64) {
     return emitOpError("integer and floating point types larger than 64 bits "
                        "are not supported");
   }
-
   DPPPerm kind = getKind();
   Attribute permArgument = getPermArgument().value_or(Attribute{});
 
diff --git a/mlir/test/Dialect/AMDGPU/dpp-verify-no-assert-on-vectors.mlir b/mlir/test/Dialect/AMDGPU/dpp-verify-no-assert-on-vectors.mlir
new file mode 100644
index 0000000000000..c2d712cc3d004
--- /dev/null
+++ b/mlir/test/Dialect/AMDGPU/dpp-verify-no-assert-on-vectors.mlir
@@ -0,0 +1,13 @@
+// RUN: mlir-opt %s -verify-each
+
+// DPPOp verifier must not assert when src type is a
+// vector (e.g. ARM SME tile vectors).
+
+module {
+  func.func @main() {
+    %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
+    %pop = math.ctpop %tile : vector<[16]x[16]xi8>
+    %r = amdgpu.dpp %pop %tile row_shl(1 : i32) : vector<[16]x[16]xi8>
+    return
+  }
+}

>From ea8534d4db2503014287caec08343cb7d2517aab Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Fri, 30 Jan 2026 08:17:21 -0500
Subject: [PATCH 2/9] [mlir][AMDGPU] Fix DPPOp verifier crash on vector operand
 types with fixd format

---
 mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp
index ec44310bd0e93..c39ae9fc14831 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp
@@ -675,7 +675,7 @@ LogicalResult SparseMFMAOp::verify() {
 //===----------------------------------------------------------------------===//
 LogicalResult DPPOp::verify() {
   Type srcType = getSrc().getType();
-  Type elemType = getElementTypeOrSelf(srcType); 
+  Type elemType = getElementTypeOrSelf(srcType);
   if (elemType.getIntOrFloatBitWidth() > 64) {
     return emitOpError("integer and floating point types larger than 64 bits "
                        "are not supported");

>From 1e867d8fd4f7a61a6ba208ebd6a7fe38aab8b497 Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Sat, 31 Jan 2026 05:20:30 -0500
Subject: [PATCH 3/9] [mlir][AMDGPU] Fix DPPOp verifier crash on vector operand
 types with fixd format moving stuff to ODS

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td  | 15 +++++++++++++--
 mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp          |  6 ------
 .../AMDGPU/dpp-verify-no-assert-on-vectors.mlir   | 13 -------------
 mlir/test/Dialect/AMDGPU/ops.mlir                 | 11 +++++++++++
 4 files changed, 24 insertions(+), 21 deletions(-)
 delete mode 100644 mlir/test/Dialect/AMDGPU/dpp-verify-no-assert-on-vectors.mlir

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td
index 24e40f40c2031..8240f6180b5c3 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td
@@ -33,6 +33,17 @@ def AnyIntegerOrFloat : AnyTypeOf<[AnySignlessInteger, AnyFloat], "Integer or Fl
 def AnyIntegerOrFloatOr1DVector :
   AnyTypeOf<[AnyIntegerOrFloat, FixedVectorOfRankAndType<[1], [AnyIntegerOrFloat]>]>;
 
+// Types with element width up to 64 bits, used to keep dpp operands legal.
+def AMDGPU_IntOrFloatWidthLeq64 : Type<
+  CPred<"([](::mlir::Type t) { return t.isIntOrFloat() && t.getIntOrFloatBitWidth() <= 64; })(::mlir::getElementTypeOrSelf($_self))">,
+  "integer or float with element bitwidth ≤ 64">;
+
+def AMDGPU_IntOrFloatOr1DVectorWidthLeq64 :
+  AnyTypeOf<[
+    AMDGPU_IntOrFloatWidthLeq64,
+    FixedVectorOfRankAndType<[1], [AMDGPU_IntOrFloatWidthLeq64]>
+  ]>;
+
 //===----------------------------------------------------------------------===//
 // AMDGPU Op definitions
 //===----------------------------------------------------------------------===//
@@ -643,8 +654,8 @@ def AMDGPU_RawBufferAtomicUminOp :
 
 def AMDGPU_DPPOp : AMDGPU_Op<"dpp",
     [Pure, SameTypeOperands, AllTypesMatch<["result", "old", "src"]>]>,
-  Arguments<(ins AnyType:$old,
-                 AnyType:$src,
+  Arguments<(ins AMDGPU_IntOrFloatOr1DVectorWidthLeq64:$old,
+                 AMDGPU_IntOrFloatOr1DVectorWidthLeq64:$src,
                  AMDGPU_DPPPermAttr:$kind,
                  OptionalAttr<AnyAttrOf<[I32Attr, ArrayAttr, UnitAttr]>>:$permArgument,
                  DefaultValuedAttr<I32Attr, "0xf">:$row_mask,
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp
index c39ae9fc14831..f777817adee8e 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUOps.cpp
@@ -674,12 +674,6 @@ LogicalResult SparseMFMAOp::verify() {
 // DPPOp
 //===----------------------------------------------------------------------===//
 LogicalResult DPPOp::verify() {
-  Type srcType = getSrc().getType();
-  Type elemType = getElementTypeOrSelf(srcType);
-  if (elemType.getIntOrFloatBitWidth() > 64) {
-    return emitOpError("integer and floating point types larger than 64 bits "
-                       "are not supported");
-  }
   DPPPerm kind = getKind();
   Attribute permArgument = getPermArgument().value_or(Attribute{});
 
diff --git a/mlir/test/Dialect/AMDGPU/dpp-verify-no-assert-on-vectors.mlir b/mlir/test/Dialect/AMDGPU/dpp-verify-no-assert-on-vectors.mlir
deleted file mode 100644
index c2d712cc3d004..0000000000000
--- a/mlir/test/Dialect/AMDGPU/dpp-verify-no-assert-on-vectors.mlir
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: mlir-opt %s -verify-each
-
-// DPPOp verifier must not assert when src type is a
-// vector (e.g. ARM SME tile vectors).
-
-module {
-  func.func @main() {
-    %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
-    %pop = math.ctpop %tile : vector<[16]x[16]xi8>
-    %r = amdgpu.dpp %pop %tile row_shl(1 : i32) : vector<[16]x[16]xi8>
-    return
-  }
-}
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 2b3234ef8510d..ed9bc1b3dbb5d 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -801,3 +801,14 @@ func.func @wmma_scale(%fp8_src: vector<64xf8E4M3FN>, %fp6_alt_src: vector<64xf6E
   %5 = amdgpu.scaled_wmma 32x16x128 (%scale_vec4_e4m3 * %fp4_src_a) * (%scale_vec4_e4m3 * %fp4_src_b) + %dst1 {a_first_scale_lane = 0 : i32, b_first_scale_lane = 0 : i32} : vector<4xf8E4M3FN>, vector<128xf4E2M1FN>, vector<4xf8E4M3FN>, vector<64xf4E2M1FN>, vector<16xf32>
   func.return
 }
+
+// CHECK-LABEL: func.func @dpp_vector_src_does_not_assert
+// CHECK: arm_sme.get_tile
+// CHECK: math.ctpop
+// CHECK: amdgpu.dpp
+func.func @dpp_vector_src_does_not_assert() {
+  %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
+  %pop = math.ctpop %tile : vector<[16]x[16]xi8>
+  %r = amdgpu.dpp %pop %tile row_shl(1 : i32) : vector<[16]x[16]xi8>
+  func.return
+}
\ No newline at end of file

>From ec72c88472dcdd164779f32293471186a18cc794 Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Sat, 31 Jan 2026 13:40:12 -0500
Subject: [PATCH 4/9] [mlir][AMDGPU] Fix DPPOp verifier crash on vector operand
 types with fixd format added review changes

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td | 2 +-
 mlir/test/Dialect/AMDGPU/ops.mlir                | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td
index 8240f6180b5c3..35a627843de13 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td
@@ -36,7 +36,7 @@ def AnyIntegerOrFloatOr1DVector :
 // Types with element width up to 64 bits, used to keep dpp operands legal.
 def AMDGPU_IntOrFloatWidthLeq64 : Type<
   CPred<"([](::mlir::Type t) { return t.isIntOrFloat() && t.getIntOrFloatBitWidth() <= 64; })(::mlir::getElementTypeOrSelf($_self))">,
-  "integer or float with element bitwidth ≤ 64">;
+  "integer or float with element bitwidth <= 64">;
 
 def AMDGPU_IntOrFloatOr1DVectorWidthLeq64 :
   AnyTypeOf<[
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index ed9bc1b3dbb5d..7b2d5ca96e040 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -806,9 +806,7 @@ func.func @wmma_scale(%fp8_src: vector<64xf8E4M3FN>, %fp6_alt_src: vector<64xf6E
 // CHECK: arm_sme.get_tile
 // CHECK: math.ctpop
 // CHECK: amdgpu.dpp
-func.func @dpp_vector_src_does_not_assert() {
-  %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
-  %pop = math.ctpop %tile : vector<[16]x[16]xi8>
+func.func @dpp_vector_src_does_not_assert(%tile: vector<[16]x[16]xi8>,%pop:  vector<[16]x[16]xi8>) {
   %r = amdgpu.dpp %pop %tile row_shl(1 : i32) : vector<[16]x[16]xi8>
   func.return
-}
\ No newline at end of file
+}

>From 27c71d179e591401319d345f4ef83f1b0a0d51a7 Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Sat, 31 Jan 2026 13:43:21 -0500
Subject: [PATCH 5/9] [mlir][AMDGPU] Fix DPPOp verifier crash on vector operand
 types with fixd format added review changes

---
 mlir/test/Dialect/AMDGPU/ops.mlir | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 7b2d5ca96e040..72ba71c5d416a 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -803,10 +803,8 @@ func.func @wmma_scale(%fp8_src: vector<64xf8E4M3FN>, %fp6_alt_src: vector<64xf6E
 }
 
 // CHECK-LABEL: func.func @dpp_vector_src_does_not_assert
-// CHECK: arm_sme.get_tile
-// CHECK: math.ctpop
 // CHECK: amdgpu.dpp
-func.func @dpp_vector_src_does_not_assert(%tile: vector<[16]x[16]xi8>,%pop:  vector<[16]x[16]xi8>) {
+func.func @dpp_vector_src_does_not_assert(%tile: vector<[16]x[16]xi8>, %pop: vector<[16]x[16]xi8>) {
   %r = amdgpu.dpp %pop %tile row_shl(1 : i32) : vector<[16]x[16]xi8>
   func.return
 }

>From 470ff9e2686b626362efb83dbbfd559cd43d4592 Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Sat, 31 Jan 2026 13:45:40 -0500
Subject: [PATCH 6/9] [mlir][AMDGPU] Fix DPPOp verifier crash on vector operand
 types with fixd format added review changes

---
 mlir/test/Dialect/AMDGPU/ops.mlir | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 72ba71c5d416a..f62578bc3381b 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -808,3 +808,4 @@ func.func @dpp_vector_src_does_not_assert(%tile: vector<[16]x[16]xi8>, %pop: vec
   %r = amdgpu.dpp %pop %tile row_shl(1 : i32) : vector<[16]x[16]xi8>
   func.return
 }
+

>From 781d566a0f3f4f6db09d68f0b847bb2dcefa9e75 Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Sun, 1 Feb 2026 03:59:25 -0500
Subject: [PATCH 7/9] [mlir][AMDGPU] Fix DPPOp verifier crash on vector and
 preventing scalable types

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td | 2 +-
 mlir/test/Dialect/AMDGPU/invalid.mlir            | 8 ++++++++
 mlir/test/Dialect/AMDGPU/ops.mlir                | 5 ++---
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td
index 35a627843de13..c7523c386f36a 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUOps.td
@@ -35,7 +35,7 @@ def AnyIntegerOrFloatOr1DVector :
 
 // Types with element width up to 64 bits, used to keep dpp operands legal.
 def AMDGPU_IntOrFloatWidthLeq64 : Type<
-  CPred<"([](::mlir::Type t) { return t.isIntOrFloat() && t.getIntOrFloatBitWidth() <= 64; })(::mlir::getElementTypeOrSelf($_self))">,
+  CPred<"$_self.isIntOrFloat() && $_self.getIntOrFloatBitWidth() <= 64">,
   "integer or float with element bitwidth <= 64">;
 
 def AMDGPU_IntOrFloatOr1DVectorWidthLeq64 :
diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir
index 1299f3b14b14f..a763c5daed2e5 100644
--- a/mlir/test/Dialect/AMDGPU/invalid.mlir
+++ b/mlir/test/Dialect/AMDGPU/invalid.mlir
@@ -516,3 +516,11 @@ func.func @sparse_mfma_wrong_dest_count(%a: vector<4xf16>, %b: vector<8xf16>, %c
   %d = amdgpu.sparse_mfma 16x16x32 %a * %b + %c sparse(%idx : vector<4xi8>) : vector<4xf16>, vector<8xf16>, vector<16xf32>
   func.return %d : vector<16xf32>
 }
+
+// -----
+
+func.func @dpp_rejects_scalable(%a: vector<[16]x[16]xi8>, %b: vector<[16]x[16]xi8>) {
+  // expected-error @+1 {{operand #0 must be integer or float with element bitwidth <= 64}}
+  %0 = amdgpu.dpp %a %b row_shl(1 : i32) : vector<[16]x[16]xi8>
+  func.return
+}
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index f62578bc3381b..e9c91265a7886 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -804,8 +804,7 @@ func.func @wmma_scale(%fp8_src: vector<64xf8E4M3FN>, %fp6_alt_src: vector<64xf6E
 
 // CHECK-LABEL: func.func @dpp_vector_src_does_not_assert
 // CHECK: amdgpu.dpp
-func.func @dpp_vector_src_does_not_assert(%tile: vector<[16]x[16]xi8>, %pop: vector<[16]x[16]xi8>) {
-  %r = amdgpu.dpp %pop %tile row_shl(1 : i32) : vector<[16]x[16]xi8>
+func.func @dpp_vector_src_does_not_assert(%tile: vector<256xi8>, %pop: vector<256xi8>) {
+  %r = amdgpu.dpp %pop %tile row_shl(1 : i32) : vector<256xi8>
   func.return
 }
-

>From 038880e785a118b1b1b70dbd1465b1a429a6281c Mon Sep 17 00:00:00 2001
From: Ayush3941 <ayushkgaur1 at gmail.com>
Date: Sun, 1 Feb 2026 04:07:24 -0500
Subject: [PATCH 8/9] [mlir][AMDGPU] Fix DPPOp verifier crash on vector and
 preventing scalable types with a test

---
 mlir/test/Dialect/AMDGPU/invalid.mlir | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir
index a763c5daed2e5..b83f5c9537646 100644
--- a/mlir/test/Dialect/AMDGPU/invalid.mlir
+++ b/mlir/test/Dialect/AMDGPU/invalid.mlir
@@ -520,7 +520,7 @@ func.func @sparse_mfma_wrong_dest_count(%a: vector<4xf16>, %b: vector<8xf16>, %c
 // -----
 
 func.func @dpp_rejects_scalable(%a: vector<[16]x[16]xi8>, %b: vector<[16]x[16]xi8>) {
-  // expected-error @+1 {{operand #0 must be integer or float with element bitwidth <= 64}}
+  // expected-error @+1 {{fixed-length vector of integer or float with element bitwidth <= 64 values of ranks 1}}
   %0 = amdgpu.dpp %a %b row_shl(1 : i32) : vector<[16]x[16]xi8>
   func.return
 }

>From d2420072a771659f8a3ce7e63862b9840c1328b1 Mon Sep 17 00:00:00 2001
From: Ayush Kumar Gaur <132849148+Ayush3941 at users.noreply.github.com>
Date: Sat, 7 Feb 2026 06:33:26 -0500
Subject: [PATCH 9/9] fixed merge

---
 mlir/test/Dialect/AMDGPU/ops.mlir | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index d94aff8e82e51..5011891ed39d2 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -828,4 +828,5 @@ func.func @ds_barrier_ops(%barrier: memref<!amdgpu.ds_barrier_state, #gpu.addres
   %init = amdgpu.ds_barrier_state_init_count %state : !amdgpu.ds_barrier_state -> i32
   // CHECK: [[PARITY:%.*]] = amdgpu.ds_barrier_state_phase_parity [[STATE]] : !amdgpu.ds_barrier_state -> i1
   %parity = amdgpu.ds_barrier_state_phase_parity %state : !amdgpu.ds_barrier_state -> i1
-
+  func.return
+}