[Mlir-commits] [flang] [mlir] [mlir][ODS] Fix notorious double-space bug in op printers (PR #184253)

Mon Mar 2 14:54:50 PST 2026

https://github.com/kuhar created https://github.com/llvm/llvm-project/pull/184253

When an op's assembly format prints an attribute via `printStrippedAttrOrType`, two independent space-emission mechanisms would fire: the op format generator emits a space before each argument, and the attribute's generated `print` method also emits a leading space (`shouldEmitSpace` initialized to true). This caused double spaces like `gpu.shuffle  xor`.

The usual workaround for this was to add double backticks to consume the leading space.

Fixed by removing the leading space from generated attr/type print() methods (initializing shouldEmitSpace=false) and compensating in the print dispatcher by conditionally adding a space between the mnemonic and `print` call when the format starts with a name or keyword rather than punctuation.

Also remove some workarounds for the double-spacing bug in op formats and fix tests that now don't have leading spaces.

Assisted-by: claude

>From f3beb74c9afb652bff56a20f9cde24327963749c Mon Sep 17 00:00:00 2001
From: Jakub Kuderski <jakub at nod-labs.com>
Date: Mon, 2 Mar 2026 07:23:50 -0500
Subject: [PATCH] [mlir][ODS] Fix notorious double-space bug in op printers

When an op's assembly format prints an attribute via `printStrippedAttrOrType`,
two independent space-emission mechanisms would fire: the op format generator
emits a space before each argument, and the attribute's generated `print`
method also emits a leading space (`shouldEmitSpace` initialized to true).
This caused double spaces like `gpu.shuffle  xor`.

The usual workaround for this was to add double backticks to consume the
leading space.

Fixed by removing the leading space from generated attr/type print() methods
(initializing shouldEmitSpace=false) and compensating in the print dispatcher
by conditionally adding a space between the mnemonic and `print` call when
the format starts with a name or keyword rather than punctuation.

Also remove some workarounds for the double-spacing bug in op formats
and fix tests that now don't have leading spaces.

Assisted-by: claude
---
 flang/test/Lower/location.f90                 |  2 +-
 .../include/mlir/Dialect/Index/IR/IndexOps.td |  2 +-
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td |  2 +-
 mlir/test/CAPI/irdl.c                         |  2 +-
 mlir/test/Dialect/GPU/shuffle-rewrite.mlir    |  8 ++--
 mlir/test/Dialect/GPU/sparse-roundtrip.mlir   |  4 +-
 .../SparseTensor/GPU/gpu_spgemm_lib.mlir      |  8 ++--
 .../Vector/vector-warp-distribute.mlir        | 34 ++++++++--------
 .../XeGPU/sg-to-wi-experimental-unit.mlir     | 40 +++++++++----------
 mlir/test/IR/array-of-attr.mlir               |  2 +-
 .../attr-or-type-format-roundtrip.mlir        |  4 +-
 mlir/test/mlir-tblgen/attr-or-type-format.td  | 35 +++++++++++++---
 mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp   | 39 +++++++++++++++++-
 .../tools/mlir-tblgen/AttrOrTypeFormatGen.cpp | 11 +++--
 14 files changed, 129 insertions(+), 64 deletions(-)

diff --git a/flang/test/Lower/location.f90 b/flang/test/Lower/location.f90
index cdde1cc4cb40a..744ff7e3bf039 100644
--- a/flang/test/Lower/location.f90
+++ b/flang/test/Lower/location.f90
@@ -6,7 +6,7 @@ program test
 end
 
 ! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "TEST"} {
-! CHECK: fir.call @_FortranAioOutputAscii(%{{.*}}, %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.ref<i8>, !fir.ref<i8>, i64) -> i1 loc(fused<#fir<loc_kind_array[ base,  inclusion,  inclusion]>>["{{.*}}location1.inc":1:10, "{{.*}}location0.inc":1:1, "{{.*}}location.f90":4:1])
+! CHECK: fir.call @_FortranAioOutputAscii(%{{.*}}, %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.ref<i8>, !fir.ref<i8>, i64) -> i1 loc(fused<#fir<loc_kind_array[base, inclusion, inclusion]>>["{{.*}}location1.inc":1:10, "{{.*}}location0.inc":1:1, "{{.*}}location.f90":4:1])
 ! CHECK: return loc("{{.*}}location.f90":6:1)
 ! CHECK: } loc("{{.*}}location.f90":3:1)
 
diff --git a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td
index 230a3815bdd81..d97d5be698034 100644
--- a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td
+++ b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td
@@ -560,7 +560,7 @@ def Index_CmpOp : IndexOp<"cmp", [Pure]> {
 
   let arguments = (ins IndexCmpPredicateAttr:$pred, Index:$lhs, Index:$rhs);
   let results = (outs I1:$result);
-  let assemblyFormat = "`` $pred `(` $lhs `,` $rhs `)` attr-dict";
+  let assemblyFormat = "$pred `(` $lhs `,` $rhs `)` attr-dict";
   let hasFolder = 1;
   let hasCanonicalizeMethod = 1;
 }
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 6d21aa9295716..47a1804647fa7 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1490,7 +1490,7 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
   }];
   let arguments = (ins XeGPU_MemorySpaceAttr: $memory_kind,
                        XeGPU_FenceScopeAttr: $fence_scope);
-  let assemblyFormat = [{`memory_kind` `=` `` $memory_kind `,` `fence_scope` `=` `` $fence_scope attr-dict}];
+  let assemblyFormat = [{`memory_kind` `=` $memory_kind `,` `fence_scope` `=` $fence_scope attr-dict}];
   let extraClassDeclaration = extraBaseClassDeclaration;
 }
 
diff --git a/mlir/test/CAPI/irdl.c b/mlir/test/CAPI/irdl.c
index 20cf35f2501ff..4ee4bc0cc35b6 100644
--- a/mlir/test/CAPI/irdl.c
+++ b/mlir/test/CAPI/irdl.c
@@ -62,7 +62,7 @@ void testVariadicityAttributes(MlirContext ctx) {
   MlirAttribute variadicityArray =
       mlirIRDLVariadicityArrayAttrGet(ctx, 3, variadicities);
 
-  // CHECK: #irdl<variadicity_array[ single, optional,  variadic]>
+  // CHECK: #irdl<variadicity_array[single, optional, variadic]>
   mlirAttributeDump(variadicityArray);
 }
 
diff --git a/mlir/test/Dialect/GPU/shuffle-rewrite.mlir b/mlir/test/Dialect/GPU/shuffle-rewrite.mlir
index c0ccae05a0572..5492ebee915cd 100644
--- a/mlir/test/Dialect/GPU/shuffle-rewrite.mlir
+++ b/mlir/test/Dialect/GPU/shuffle-rewrite.mlir
@@ -10,8 +10,8 @@ module {
       // CHECK-NEXT: %[[LO:.*]] = arith.trunci %[[INTVAL]] : i64 to i32
       // CHECK-NEXT: %[[HI64:.*]] = arith.shrui %[[INTVAL]], %[[C32:.*]] : i64
       // CHECK-NEXT: %[[HI:.*]] = arith.trunci %[[HI64]] : i64 to i32
-      // CHECK-NEXT: %[[SH1:.*]], %[[V1:.*]] = gpu.shuffle  xor %[[LO]], %[[OFF]], %[[WIDTH]] : i32
-      // CHECK-NEXT: %[[SH2:.*]], %[[V2:.*]] = gpu.shuffle  xor %[[HI]], %[[OFF]], %[[WIDTH]] : i32
+      // CHECK-NEXT: %[[SH1:.*]], %[[V1:.*]] = gpu.shuffle xor %[[LO]], %[[OFF]], %[[WIDTH]] : i32
+      // CHECK-NEXT: %[[SH2:.*]], %[[V2:.*]] = gpu.shuffle xor %[[HI]], %[[OFF]], %[[WIDTH]] : i32
       // CHECK-NEXT: %[[LOSH:.*]] = arith.extui %[[SH1]] : i32 to i64
       // CHECK-NEXT: %[[HISHTMP:.*]] = arith.extui %[[SH2]] : i32 to i64
       // CHECK-NEXT: %[[HISH:.*]] = arith.shli %[[HISHTMP]], %[[C32]] : i64
@@ -36,8 +36,8 @@ module {
       // CHECK: %[[LO:.*]] = arith.trunci %[[VALUE]] : i64 to i32
       // CHECK-NEXT: %[[HI64:.*]] = arith.shrui %[[VALUE]], %[[C32:.*]] : i64
       // CHECK-NEXT: %[[HI:.*]] = arith.trunci %[[HI64]] : i64 to i32
-      // CHECK-NEXT: %[[SH1:.*]], %[[V1:.*]] = gpu.shuffle  xor %[[LO]], %[[OFF]], %[[WIDTH]] : i32
-      // CHECK-NEXT: %[[SH2:.*]], %[[V2:.*]] = gpu.shuffle  xor %[[HI]], %[[OFF]], %[[WIDTH]] : i32
+      // CHECK-NEXT: %[[SH1:.*]], %[[V1:.*]] = gpu.shuffle xor %[[LO]], %[[OFF]], %[[WIDTH]] : i32
+      // CHECK-NEXT: %[[SH2:.*]], %[[V2:.*]] = gpu.shuffle xor %[[HI]], %[[OFF]], %[[WIDTH]] : i32
       // CHECK-NEXT: %[[LOSH:.*]] = arith.extui %[[SH1]] : i32 to i64
       // CHECK-NEXT: %[[HISHTMP:.*]] = arith.extui %[[SH2]] : i32 to i64
       // CHECK-NEXT: %[[HISH:.*]] = arith.shli %[[HISHTMP]], %[[C32]] : i64
diff --git a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
index 1e74aa3a4813a..b88492433ba02 100644
--- a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
+++ b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
@@ -62,8 +62,8 @@ module attributes {gpu.container_module} {
   // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
   // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
   // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_create_descr async [%{{.*}}]
-  // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ WORK_ESTIMATION} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
-  // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ COMPUTE} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
+  // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{WORK_ESTIMATION} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
+  // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{COMPUTE} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
   // CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} = gpu.spmat_get_size async [%{{.*}}] %{{.*}}
   // CHECK: %{{.*}} = gpu.set_csr_pointers async [%{{.*}}] %{{.*}}, {{.*}}, {{.*}}, {{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
   // CHECK: %{{.*}} = gpu.spgemm_copy async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
index fa8ad1cc50604..9688e886f69ca 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
@@ -49,12 +49,12 @@
 // CHECK:           %[[VAL_53:.*]], %[[VAL_54:.*]] = gpu.alloc async {{\[}}%[[VAL_52]]] (%[[VAL_3]]) : memref<?xf32>
 // CHECK:           %[[VAL_55:.*]], %[[VAL_56:.*]] = gpu.create_csr async {{\[}}%[[VAL_54]]] %[[VAL_2]], %[[VAL_2]], %[[VAL_3]], %[[VAL_49]], %[[VAL_51]], %[[VAL_53]] : memref<?xindex>, memref<?xindex>, memref<?xf32>
 // CHECK:           %[[VAL_57:.*]], %[[VAL_58:.*]] = gpu.spgemm_create_descr async {{\[}}%[[VAL_56]]]
-// CHECK:           %[[VAL_59:.*]], %[[VAL_60:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_58]]]{ WORK_ESTIMATION} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_3]], %[[VAL_53]] : f32 into memref<?xf32>
+// CHECK:           %[[VAL_59:.*]], %[[VAL_60:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_58]]]{WORK_ESTIMATION} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_3]], %[[VAL_53]] : f32 into memref<?xf32>
 // CHECK:           %[[VAL_61:.*]], %[[VAL_62:.*]] = gpu.alloc async {{\[}}%[[VAL_60]]] (%[[VAL_59]]) : memref<?xi8>
-// CHECK:           %[[VAL_63:.*]], %[[VAL_64:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_62]]]{ WORK_ESTIMATION} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_59]], %[[VAL_61]] : f32 into memref<?xi8>
-// CHECK:           %[[VAL_65:.*]], %[[VAL_66:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_64]]]{ COMPUTE} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_3]], %[[VAL_53]] : f32 into memref<?xf32>
+// CHECK:           %[[VAL_63:.*]], %[[VAL_64:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_62]]]{WORK_ESTIMATION} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_59]], %[[VAL_61]] : f32 into memref<?xi8>
+// CHECK:           %[[VAL_65:.*]], %[[VAL_66:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_64]]]{COMPUTE} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_3]], %[[VAL_53]] : f32 into memref<?xf32>
 // CHECK:           %[[VAL_67:.*]], %[[VAL_68:.*]] = gpu.alloc async {{\[}}%[[VAL_66]]] (%[[VAL_65]]) : memref<?xi8>
-// CHECK:           %[[VAL_69:.*]], %[[VAL_70:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_68]]]{ COMPUTE} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_65]], %[[VAL_67]] : f32 into memref<?xi8>
+// CHECK:           %[[VAL_69:.*]], %[[VAL_70:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_68]]]{COMPUTE} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_65]], %[[VAL_67]] : f32 into memref<?xi8>
 // CHECK:           %[[VAL_71:.*]], %[[VAL_72:.*]], %[[VAL_73:.*]], %[[VAL_74:.*]] = gpu.spmat_get_size async {{\[}}%[[VAL_70]]] %[[VAL_55]]
 // CHECK:           %[[VAL_75:.*]], %[[VAL_76:.*]] = gpu.alloc async {{\[}}%[[VAL_74]]] (%[[VAL_73]]) : memref<?xindex>
 // CHECK:           %[[VAL_77:.*]], %[[VAL_78:.*]] = gpu.alloc async {{\[}}%[[VAL_76]]] (%[[VAL_73]]) : memref<?xf32>
diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
index 63c9d9b7a9bf8..bea098da13f1e 100644
--- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
+++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
@@ -741,15 +741,15 @@ func.func @warp_scf_for_swapped_for_results(%arg0: index) {
 //       CHECK-PROP:     gpu.yield %{{.*}} : vector<32xf32>
 //       CHECK-PROP:   }
 //       CHECK-PROP:   %[[a:.*]] = vector.extract %[[warp_op]][0] : f32 from vector<1xf32>
-//       CHECK-PROP:   %[[r0:.*]], %{{.*}} = gpu.shuffle  xor %[[a]], %[[c1]], %[[c32]]
+//       CHECK-PROP:   %[[r0:.*]], %{{.*}} = gpu.shuffle xor %[[a]], %[[c1]], %[[c32]]
 //       CHECK-PROP:   %[[a0:.*]] = arith.addf %[[a]], %[[r0]]
-//       CHECK-PROP:   %[[r1:.*]], %{{.*}} = gpu.shuffle  xor %[[a0]], %[[c2]], %[[c32]]
+//       CHECK-PROP:   %[[r1:.*]], %{{.*}} = gpu.shuffle xor %[[a0]], %[[c2]], %[[c32]]
 //       CHECK-PROP:   %[[a1:.*]] = arith.addf %[[a0]], %[[r1]]
-//       CHECK-PROP:   %[[r2:.*]], %{{.*}} = gpu.shuffle  xor %[[a1]], %[[c4]], %[[c32]]
+//       CHECK-PROP:   %[[r2:.*]], %{{.*}} = gpu.shuffle xor %[[a1]], %[[c4]], %[[c32]]
 //       CHECK-PROP:   %[[a2:.*]] = arith.addf %[[a1]], %[[r2]]
-//       CHECK-PROP:   %[[r3:.*]], %{{.*}} = gpu.shuffle  xor %[[a2]], %[[c8]], %[[c32]]
+//       CHECK-PROP:   %[[r3:.*]], %{{.*}} = gpu.shuffle xor %[[a2]], %[[c8]], %[[c32]]
 //       CHECK-PROP:   %[[a3:.*]] = arith.addf %[[a2]], %[[r3]]
-//       CHECK-PROP:   %[[r4:.*]], %{{.*}} = gpu.shuffle  xor %[[a3]], %[[c16]], %[[c32]]
+//       CHECK-PROP:   %[[r4:.*]], %{{.*}} = gpu.shuffle xor %[[a3]], %[[c16]], %[[c32]]
 //       CHECK-PROP:   %[[a4:.*]] = arith.addf %[[a3]], %[[r4]]
 //       CHECK-PROP:   return %[[a4]] : f32
 func.func @vector_reduction(%laneid: index) -> (f32) {
@@ -829,15 +829,15 @@ func.func @vector_reduction(%laneid: index, %m0: memref<4x2x32xf32>, %m1: memref
 //       CHECK-PROP:     gpu.yield %{{.*}} : vector<64xf32>
 //       CHECK-PROP:   }
 //       CHECK-PROP:   %[[a:.*]] = vector.reduction <add>, %[[warp_op]] : vector<2xf32> into f32
-//       CHECK-PROP:   %[[r0:.*]], %{{.*}} = gpu.shuffle  xor %[[a]], %[[c1]], %[[c32]]
+//       CHECK-PROP:   %[[r0:.*]], %{{.*}} = gpu.shuffle xor %[[a]], %[[c1]], %[[c32]]
 //       CHECK-PROP:   %[[a0:.*]] = arith.addf %[[a]], %[[r0]]
-//       CHECK-PROP:   %[[r1:.*]], %{{.*}} = gpu.shuffle  xor %[[a0]], %[[c2]], %[[c32]]
+//       CHECK-PROP:   %[[r1:.*]], %{{.*}} = gpu.shuffle xor %[[a0]], %[[c2]], %[[c32]]
 //       CHECK-PROP:   %[[a1:.*]] = arith.addf %[[a0]], %[[r1]]
-//       CHECK-PROP:   %[[r2:.*]], %{{.*}} = gpu.shuffle  xor %[[a1]], %[[c4]], %[[c32]]
+//       CHECK-PROP:   %[[r2:.*]], %{{.*}} = gpu.shuffle xor %[[a1]], %[[c4]], %[[c32]]
 //       CHECK-PROP:   %[[a2:.*]] = arith.addf %[[a1]], %[[r2]]
-//       CHECK-PROP:   %[[r3:.*]], %{{.*}} = gpu.shuffle  xor %[[a2]], %[[c8]], %[[c32]]
+//       CHECK-PROP:   %[[r3:.*]], %{{.*}} = gpu.shuffle xor %[[a2]], %[[c8]], %[[c32]]
 //       CHECK-PROP:   %[[a3:.*]] = arith.addf %[[a2]], %[[r3]]
-//       CHECK-PROP:   %[[r4:.*]], %{{.*}} = gpu.shuffle  xor %[[a3]], %[[c16]], %[[c32]]
+//       CHECK-PROP:   %[[r4:.*]], %{{.*}} = gpu.shuffle xor %[[a3]], %[[c16]], %[[c32]]
 //       CHECK-PROP:   %[[a4:.*]] = arith.addf %[[a3]], %[[r4]]
 //       CHECK-PROP:   return %[[a4]] : f32
 func.func @vector_reduction_large(%laneid: index) -> (f32) {
@@ -863,15 +863,15 @@ func.func @vector_reduction_large(%laneid: index) -> (f32) {
 //       CHECK-PROP:     gpu.yield %{{.*}}, %{{.*}} : vector<64xf32>, f32
 //       CHECK-PROP:   }
 //       CHECK-PROP:   %[[a:.*]] = vector.reduction <add>, %[[warp_op]]#0 : vector<2xf32> into f32
-//       CHECK-PROP:   %[[r0:.*]], %{{.*}} = gpu.shuffle  xor %[[a]], %[[c1]], %[[c32]]
+//       CHECK-PROP:   %[[r0:.*]], %{{.*}} = gpu.shuffle xor %[[a]], %[[c1]], %[[c32]]
 //       CHECK-PROP:   %[[a0:.*]] = arith.addf %[[a]], %[[r0]]
-//       CHECK-PROP:   %[[r1:.*]], %{{.*}} = gpu.shuffle  xor %[[a0]], %[[c2]], %[[c32]]
+//       CHECK-PROP:   %[[r1:.*]], %{{.*}} = gpu.shuffle xor %[[a0]], %[[c2]], %[[c32]]
 //       CHECK-PROP:   %[[a1:.*]] = arith.addf %[[a0]], %[[r1]]
-//       CHECK-PROP:   %[[r2:.*]], %{{.*}} = gpu.shuffle  xor %[[a1]], %[[c4]], %[[c32]]
+//       CHECK-PROP:   %[[r2:.*]], %{{.*}} = gpu.shuffle xor %[[a1]], %[[c4]], %[[c32]]
 //       CHECK-PROP:   %[[a2:.*]] = arith.addf %[[a1]], %[[r2]]
-//       CHECK-PROP:   %[[r3:.*]], %{{.*}} = gpu.shuffle  xor %[[a2]], %[[c8]], %[[c32]]
+//       CHECK-PROP:   %[[r3:.*]], %{{.*}} = gpu.shuffle xor %[[a2]], %[[c8]], %[[c32]]
 //       CHECK-PROP:   %[[a3:.*]] = arith.addf %[[a2]], %[[r3]]
-//       CHECK-PROP:   %[[r4:.*]], %{{.*}} = gpu.shuffle  xor %[[a3]], %[[c16]], %[[c32]]
+//       CHECK-PROP:   %[[r4:.*]], %{{.*}} = gpu.shuffle xor %[[a3]], %[[c16]], %[[c32]]
 //       CHECK-PROP:   %[[a4:.*]] = arith.addf %[[a3]], %[[r4]]
 //       CHECK-PROP:   %[[a5:.*]] = arith.addf %[[a4]], %[[warp_op]]#1
 //       CHECK-PROP:   return %[[a5]] : f32
@@ -926,7 +926,7 @@ func.func @warp_constant(%laneid: index) -> (vector<1xf32>) {
 //       CHECK-PROP:     gpu.yield %[[V]] : vector<64xf32>
 //       CHECK-PROP:   }
 //       CHECK-PROP:   %[[E:.*]] = vector.extract %[[R]][1] : f32 from vector<2xf32>
-//       CHECK-PROP:   %[[SHUFFLED:.*]], %{{.*}} = gpu.shuffle  idx %[[E]], %[[C5_I32]]
+//       CHECK-PROP:   %[[SHUFFLED:.*]], %{{.*}} = gpu.shuffle idx %[[E]], %[[C5_I32]]
 //       CHECK-PROP:   return %[[SHUFFLED]] : f32
 func.func @vector_extract_1d(%laneid: index) -> (f32) {
   %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (f32) {
@@ -1061,7 +1061,7 @@ func.func @vector_extract_1element(%laneid: index) -> (f32) {
 //       CHECK-PROP:   %[[DISTR_POS:.*]] = affine.apply #[[$map1]]()[%[[POS]]]
 //       CHECK-PROP:   %[[EXTRACTED:.*]] = vector.extract %[[W]][%[[DISTR_POS]]] : f32 from vector<3xf32>
 //       CHECK-PROP:   %[[FROM_LANE_I32:.*]] = arith.index_cast %[[FROM_LANE]] : index to i32
-//       CHECK-PROP:   %[[SHUFFLED:.*]], %{{.*}} = gpu.shuffle  idx %[[EXTRACTED]], %[[FROM_LANE_I32]], %[[C32]] : f32
+//       CHECK-PROP:   %[[SHUFFLED:.*]], %{{.*}} = gpu.shuffle idx %[[EXTRACTED]], %[[FROM_LANE_I32]], %[[C32]] : f32
 //       CHECK-PROP:   return %[[SHUFFLED]]
 func.func @vector_extract_1d(%laneid: index, %pos: index) -> (f32) {
   %r = gpu.warp_execute_on_lane_0(%laneid)[32] -> (f32) {
diff --git a/mlir/test/Dialect/XeGPU/sg-to-wi-experimental-unit.mlir b/mlir/test/Dialect/XeGPU/sg-to-wi-experimental-unit.mlir
index bc36554f5c266..5709ddc6c3403 100644
--- a/mlir/test/Dialect/XeGPU/sg-to-wi-experimental-unit.mlir
+++ b/mlir/test/Dialect/XeGPU/sg-to-wi-experimental-unit.mlir
@@ -267,19 +267,19 @@ gpu.func @scatter_ops_with_leading_dims(%src: memref<256xf16>) {
 // CHECK:     %[[LANE_RED:.*]] = vector.reduction <add>, %[[CAST:.*]] : vector<2xf32> into f32
 // CHECK-DAG: %[[C16_1:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : i32
-// CHECK:     %[[SHUFFLE1:.*]], %{{.*}} = gpu.shuffle  xor %[[LANE_RED]], %[[C1]], %[[C16_1]] : f32
+// CHECK:     %[[SHUFFLE1:.*]], %{{.*}} = gpu.shuffle xor %[[LANE_RED]], %[[C1]], %[[C16_1]] : f32
 // CHECK:     %[[ADD1:.*]] = arith.addf %[[LANE_RED]], %[[SHUFFLE1]] : f32
 // CHECK-DAG: %[[C16_2:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32
-// CHECK:     %[[SHUFFLE2:.*]], %{{.*}} = gpu.shuffle  xor %[[ADD1]], %[[C2]], %[[C16_2]] : f32
+// CHECK:     %[[SHUFFLE2:.*]], %{{.*}} = gpu.shuffle xor %[[ADD1]], %[[C2]], %[[C16_2]] : f32
 // CHECK:     %[[ADD2:.*]] = arith.addf %[[ADD1]], %[[SHUFFLE2]] : f32
 // CHECK-DAG: %[[C16_3:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : i32
-// CHECK:     %[[SHUFFLE3:.*]], %{{.*}} = gpu.shuffle  xor %[[ADD2]], %[[C4]], %[[C16_3]] : f32
+// CHECK:     %[[SHUFFLE3:.*]], %{{.*}} = gpu.shuffle xor %[[ADD2]], %[[C4]], %[[C16_3]] : f32
 // CHECK:     %[[ADD3:.*]] = arith.addf %[[ADD2]], %[[SHUFFLE3]] : f32
 // CHECK-DAG: %[[C16_4:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : i32
-// CHECK:     %[[SHUFFLE4:.*]], %{{.*}} = gpu.shuffle  xor %[[ADD3]], %[[C8]], %[[C16_4]] : f32
+// CHECK:     %[[SHUFFLE4:.*]], %{{.*}} = gpu.shuffle xor %[[ADD3]], %[[C8]], %[[C16_4]] : f32
 // CHECK:     %[[ADD4:.*]] = arith.addf %[[ADD3]], %[[SHUFFLE4]] : f32
 // CHECK:     %[[FINAL:.*]] = arith.addf %[[ADD4]], %[[CST]] : f32
 gpu.func @vector_reduction() {
@@ -300,19 +300,19 @@ gpu.func @vector_reduction() {
 // CHECK: %[[V3:.*]] = vector.reduction <add>, %[[V1]] : vector<1xf32> into f32
 // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : i32
-// CHECK: %[[SHUFFLE1:.*]], %{{.*}} = gpu.shuffle  xor %[[V3]], %[[C1]], %[[C16]] : f32
+// CHECK: %[[SHUFFLE1:.*]], %{{.*}} = gpu.shuffle xor %[[V3]], %[[C1]], %[[C16]] : f32
 // CHECK: %[[V4:.*]] = arith.addf %[[V3]], %[[SHUFFLE1]] : f32
 // CHECK-DAG: %[[C16_2:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32
-// CHECK: %[[SHUFFLE2:.*]], %{{.*}} = gpu.shuffle  xor %[[V4]], %[[C2]], %[[C16_2]] : f32
+// CHECK: %[[SHUFFLE2:.*]], %{{.*}} = gpu.shuffle xor %[[V4]], %[[C2]], %[[C16_2]] : f32
 // CHECK: %[[V5:.*]] = arith.addf %[[V4]], %[[SHUFFLE2]] : f32
 // CHECK-DAG: %[[C16_3:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : i32
-// CHECK: %[[SHUFFLE3:.*]], %{{.*}} = gpu.shuffle  xor %[[V5]], %[[C4]], %[[C16_3]] : f32
+// CHECK: %[[SHUFFLE3:.*]], %{{.*}} = gpu.shuffle xor %[[V5]], %[[C4]], %[[C16_3]] : f32
 // CHECK: %[[V6:.*]] = arith.addf %[[V5]], %[[SHUFFLE3]] : f32
 // CHECK-DAG: %[[C16_4:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : i32
-// CHECK: %[[SHUFFLE4:.*]], %{{.*}} = gpu.shuffle  xor %[[V6]], %[[C8]], %[[C16_4]] : f32
+// CHECK: %[[SHUFFLE4:.*]], %{{.*}} = gpu.shuffle xor %[[V6]], %[[C8]], %[[C16_4]] : f32
 // CHECK: %[[V7:.*]] = arith.addf %[[V6]], %[[SHUFFLE4]] : f32
 // CHECK: %[[V8:.*]] = arith.addf %[[V7]], %[[V2]] : f32
 // CHECK: %[[V9:.*]] = vector.insert %[[V8]], %[[CST_1]] [0] : f32 into vector<2xf32>
@@ -322,19 +322,19 @@ gpu.func @vector_reduction() {
 // CHECK: %[[V13:.*]] = vector.reduction <add>, %[[V11]] : vector<1xf32> into f32
 // CHECK-DAG: %[[C16_5:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : i32
-// CHECK: %[[SHUFFLE5:.*]], %{{.*}} = gpu.shuffle  xor %[[V13]], %[[C1_2]], %[[C16_5]] : f32
+// CHECK: %[[SHUFFLE5:.*]], %{{.*}} = gpu.shuffle xor %[[V13]], %[[C1_2]], %[[C16_5]] : f32
 // CHECK: %[[V14:.*]] = arith.addf %[[V13]], %[[SHUFFLE5]] : f32
 // CHECK-DAG: %[[C16_6:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C2_2:.*]] = arith.constant 2 : i32
-// CHECK: %[[SHUFFLE6:.*]], %{{.*}} = gpu.shuffle  xor %[[V14]], %[[C2_2]], %[[C16_6]] : f32
+// CHECK: %[[SHUFFLE6:.*]], %{{.*}} = gpu.shuffle xor %[[V14]], %[[C2_2]], %[[C16_6]] : f32
 // CHECK: %[[V15:.*]] = arith.addf %[[V14]], %[[SHUFFLE6]] : f32
 // CHECK-DAG: %[[C16_7:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C4_2:.*]] = arith.constant 4 : i32
-// CHECK: %[[SHUFFLE7:.*]], %{{.*}} = gpu.shuffle  xor %[[V15]], %[[C4_2]], %[[C16_7]] : f32
+// CHECK: %[[SHUFFLE7:.*]], %{{.*}} = gpu.shuffle xor %[[V15]], %[[C4_2]], %[[C16_7]] : f32
 // CHECK: %[[V16:.*]] = arith.addf %[[V15]], %[[SHUFFLE7]] : f32
 // CHECK-DAG: %[[C16_8:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C8_2:.*]] = arith.constant 8 : i32
-// CHECK: %[[SHUFFLE8:.*]], %{{.*}} = gpu.shuffle  xor %[[V16]], %[[C8_2]], %[[C16_8]] : f32
+// CHECK: %[[SHUFFLE8:.*]], %{{.*}} = gpu.shuffle xor %[[V16]], %[[C8_2]], %[[C16_8]] : f32
 // CHECK: %[[V17:.*]] = arith.addf %[[V16]], %[[SHUFFLE8]] : f32
 // CHECK: %[[V18:.*]] = arith.addf %[[V17]], %[[V12]] : f32
 // CHECK: %[[V19:.*]] = vector.insert %[[V18]], %[[V9]] [1] : f32 into vector<2xf32>
@@ -365,19 +365,19 @@ gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction(%laneid: index)
 // CHECK: %[[V3:.*]] = vector.reduction <add>, %[[V1]] : vector<1xf32> into f32
 // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : i32
-// CHECK: %[[SHUFFLE1:.*]], %{{.*}} = gpu.shuffle  xor %[[V3]], %[[C1]], %[[C16]] : f32
+// CHECK: %[[SHUFFLE1:.*]], %{{.*}} = gpu.shuffle xor %[[V3]], %[[C1]], %[[C16]] : f32
 // CHECK: %[[V4:.*]] = arith.addf %[[V3]], %[[SHUFFLE1:.*]] : f32
 // CHECK-DAG: %[[C16_2:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32
-// CHECK: %[[SHUFFLE2:.*]], %{{.*}} = gpu.shuffle  xor %[[V4]], %[[C2]], %[[C16_2]] : f32
+// CHECK: %[[SHUFFLE2:.*]], %{{.*}} = gpu.shuffle xor %[[V4]], %[[C2]], %[[C16_2]] : f32
 // CHECK: %[[V5:.*]] = arith.addf %[[V4]], %[[SHUFFLE2]] : f32
 // CHECK-DAG: %[[C16_3:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : i32
-// CHECK: %[[SHUFFLE3:.*]], %{{.*}} = gpu.shuffle  xor %[[V5]], %[[C4]], %[[C16_3]] : f32
+// CHECK: %[[SHUFFLE3:.*]], %{{.*}} = gpu.shuffle xor %[[V5]], %[[C4]], %[[C16_3]] : f32
 // CHECK: %[[V6:.*]] = arith.addf %[[V5]], %[[SHUFFLE3]] : f32
 // CHECK-DAG: %[[C16_4:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : i32
-// CHECK: %[[SHUFFLE4:.*]], %{{.*}} = gpu.shuffle  xor %[[V6]], %[[C8]], %[[C16_4]] : f32
+// CHECK: %[[SHUFFLE4:.*]], %{{.*}} = gpu.shuffle xor %[[V6]], %[[C8]], %[[C16_4]] : f32
 // CHECK: %[[V7:.*]] = arith.addf %[[V6]], %[[SHUFFLE4]] : f32
 // CHECK: %[[V8:.*]] = arith.addf %[[V7]], %[[V2]] : f32
 // CHECK: %[[V9:.*]] = vector.insert %[[V8]], %[[CST_1]] [0] : f32 into vector<2xf32>
@@ -387,19 +387,19 @@ gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction(%laneid: index)
 // CHECK: %[[V13:.*]] = vector.reduction <add>, %[[V11]] : vector<1xf32> into f32
 // CHECK-DAG: %[[C16_5:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : i32
-// CHECK: %[[SHUFFLE5:.*]], %{{.*}} = gpu.shuffle  xor %[[V13]], %[[C1_2]], %[[C16_5]] : f32
+// CHECK: %[[SHUFFLE5:.*]], %{{.*}} = gpu.shuffle xor %[[V13]], %[[C1_2]], %[[C16_5]] : f32
 // CHECK: %[[V14:.*]] = arith.addf %[[V13]], %[[SHUFFLE5]] : f32
 // CHECK-DAG: %[[C16_6:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C2_2:.*]] = arith.constant 2 : i32
-// CHECK: %[[SHUFFLE6:.*]], %{{.*}} = gpu.shuffle  xor %[[V14]], %[[C2_2]], %[[C16_6]] : f32
+// CHECK: %[[SHUFFLE6:.*]], %{{.*}} = gpu.shuffle xor %[[V14]], %[[C2_2]], %[[C16_6]] : f32
 // CHECK: %[[V15:.*]] = arith.addf %[[V14]], %[[SHUFFLE6]] : f32
 // CHECK-DAG: %[[C16_7:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C4_2:.*]] = arith.constant 4 : i32
-// CHECK: %[[SHUFFLE7:.*]], %{{.*}} = gpu.shuffle  xor %[[V15]], %[[C4_2]], %[[C16_7]] : f32
+// CHECK: %[[SHUFFLE7:.*]], %{{.*}} = gpu.shuffle xor %[[V15]], %[[C4_2]], %[[C16_7]] : f32
 // CHECK: %[[V16:.*]] = arith.addf %[[V15]], %[[SHUFFLE7]] : f32
 // CHECK-DAG: %[[C16_8:.*]] = arith.constant 16 : i32
 // CHECK-DAG: %[[C8_2:.*]] = arith.constant 8 : i32
-// CHECK: %[[SHUFFLE8:.*]], %{{.*}} = gpu.shuffle  xor %[[V16]], %[[C8_2]], %[[C16_8]] : f32
+// CHECK: %[[SHUFFLE8:.*]], %{{.*}} = gpu.shuffle xor %[[V16]], %[[C8_2]], %[[C16_8]] : f32
 // CHECK: %[[V17:.*]] = arith.addf %[[V16]], %[[SHUFFLE8]] : f32
 // CHECK: %[[V18:.*]] = arith.addf %[[V17]], %[[V12]] : f32
 // CHECK: %[[V19:.*]] = vector.insert %[[V18]], %[[V9]] [1] : f32 into vector<2xf32>
diff --git a/mlir/test/IR/array-of-attr.mlir b/mlir/test/IR/array-of-attr.mlir
index ad046eb8d1a07..2c7b5009f74a2 100644
--- a/mlir/test/IR/array-of-attr.mlir
+++ b/mlir/test/IR/array-of-attr.mlir
@@ -2,7 +2,7 @@
 
 // CHECK: test.array_of_attr_op
 test.array_of_attr_op
-    // CHECK-SAME: a = [ begin 0 : index end, begin 2 : index end ]
+    // CHECK-SAME: a = [begin 0 : index end, begin 2 : index end ]
     a = [begin 0 : index end, begin 2 : index end],
     // CHECK-SAME: [0, 1, -42, 42]
     b = [0, 1, -42, 42],
diff --git a/mlir/test/mlir-tblgen/attr-or-type-format-roundtrip.mlir b/mlir/test/mlir-tblgen/attr-or-type-format-roundtrip.mlir
index 35554b20fbece..9bb333f88dab1 100644
--- a/mlir/test/mlir-tblgen/attr-or-type-format-roundtrip.mlir
+++ b/mlir/test/mlir-tblgen/attr-or-type-format-roundtrip.mlir
@@ -5,9 +5,9 @@
 // CHECK: !test.type_with_format<2147, three = "hi", two = "hi">
 func.func private @test_roundtrip_parameter_parsers(!test.type_with_format<111, three = #test<attr_ugly begin 5 : index end>, two = "foo">) -> !test.type_with_format<2147, two = "hi", three = "hi">
 attributes {
-  // CHECK: #test.attr_with_format<3 : two = "hello", four = [1, 2, 3] : 42 : i64 : 0 : [4, 5, 6], [ 10 : i16]
+  // CHECK: #test.attr_with_format<3 : two = "hello", four = [1, 2, 3] : 42 : i64 : 0 : [4, 5, 6], [10 : i16]
   attr0 = #test.attr_with_format<3 : two = "hello", four = [1, 2, 3] : 42 : i64 : 0 : [4, 5, 6], [10 : i16]>,
-  // CHECK: #test.attr_with_format<5 : two = "a_string", four = [4, 5, 6, 7, 8] : 8 : i8 : 255 : [9, 10, 11], [ 10 : i16]>,
+  // CHECK: #test.attr_with_format<5 : two = "a_string", four = [4, 5, 6, 7, 8] : 8 : i8 : 255 : [9, 10, 11], [10 : i16]>,
   attr1 = #test.attr_with_format<5 : two = "a_string", four = [4, 5, 6, 7, 8] : 8 : i8 : 255 : [9, 10, 11], [10 : i16]>,
   // CHECK: #test<attr_ugly begin 5 : index end>
   attr2 = #test<attr_ugly begin 5 : index end>,
diff --git a/mlir/test/mlir-tblgen/attr-or-type-format.td b/mlir/test/mlir-tblgen/attr-or-type-format.td
index 70c335f2f826c..0a28b303bd220 100644
--- a/mlir/test/mlir-tblgen/attr-or-type-format.td
+++ b/mlir/test/mlir-tblgen/attr-or-type-format.td
@@ -2,6 +2,32 @@
 // RUN: sed 's/DEFAULT_TYPE_PARSER/0/' %s | mlir-tblgen -gen-typedef-defs -typedefs-dialect=TestDialect -I %S/../../include | FileCheck %s --check-prefix=TYPE
 // RUN: sed 's/DEFAULT_TYPE_PARSER/1/' %s | mlir-tblgen -gen-typedef-defs -typedefs-dialect=TestDialect -I %S/../../include | FileCheck %s --check-prefix=TYPE --check-prefix=DEFAULT_TYPE_PARSER
 
+/// Verify the generated printer dispatcher conditionally emits a space between
+/// the mnemonic and t.print(printer). A space is needed when the format starts
+/// with a non-punctuation element; no space when it starts with `<`, `{`, etc.
+
+// Format starts with `$value` (variable): dispatcher adds a space.
+// ATTR-LABEL: .Case<::test::TestEnumBAttr>([&](auto t)
+// ATTR-NEXT:    printer << ::test::TestEnumBAttr::getMnemonic();
+// ATTR-NEXT:    printer << ' ';
+// ATTR-NEXT:    t.print(printer);
+
+// Format starts with `{` (punctuation literal): dispatcher does not add a space.
+// ATTR-LABEL: .Case<::test::TestTAttr>([&](auto t)
+// ATTR-NEXT:    printer << ::test::TestTAttr::getMnemonic();
+// ATTR-NEXT:    t.print(printer);
+
+// Format starts with `{` (punctuation literal): dispatcher does not add a space.
+// TYPE-LABEL: .Case<::test::TestEType>([&](auto t)
+// TYPE-NEXT:    printer << ::test::TestEType::getMnemonic();
+// TYPE-NEXT:    t.print(printer);
+
+// Format starts with `$a` (variable): dispatcher adds a space.
+// TYPE-LABEL: .Case<::test::TestSType>([&](auto t)
+// TYPE-NEXT:    printer << ::test::TestSType::getMnemonic();
+// TYPE-NEXT:    printer << ' ';
+// TYPE-NEXT:    t.print(printer);
+
 include "mlir/IR/AttrTypeBase.td"
 include "mlir/IR/BuiltinAttributes.td"
 include "mlir/IR/EnumAttr.td"
@@ -64,7 +90,7 @@ def TypeParamB : TypeParameter<"TestParamD", "a type param D"> {
 // ATTR: }
 
 // ATTR: void TestAAttr::print(::mlir::AsmPrinter &odsPrinter) const {
-// ATTR:   odsPrinter << ' ' << "hello";
+// ATTR:   odsPrinter << "hello";
 // ATTR:   odsPrinter << ' ' << "=";
 // ATTR:   odsPrinter << ' ';
 // ATTR:   odsPrinter.printStrippedAttrOrType(getValue());
@@ -273,7 +299,7 @@ def EnumAttrB : EnumAttr<Test_Dialect, TestEnumB, "EnumAttrB"> {
 // TYPE:  }
 
 // TYPE: void TestCType::print(::mlir::AsmPrinter &odsPrinter) const {
-// TYPE:   odsPrinter << ' ' << "foo";
+// TYPE:   odsPrinter << "foo";
 // TYPE:   odsPrinter << ",";
 // TYPE:   odsPrinter << ' ' << ":";
 // TYPE:   odsPrinter << ' ' << "bob";
@@ -462,7 +488,6 @@ def TypeC : TestType<"TestE"> {
 
 // TYPE: void TestFType::print(::mlir::AsmPrinter &odsPrinter) const {
 // TYPE if (getA()) {
-// TYPE   odsPrinter << ' ';
 // TYPE   odsPrinter.printStrippedAttrOrType(getA());
 def TypeD : TestType<"TestF"> {
   let parameters = (ins OptionalParameter<"int">:$a);
@@ -546,7 +571,7 @@ def TypeG : TestType<"TestI"> {
 // TYPE:       odsPrinter.printStrippedAttrOrType(getB());
 // TYPE:     odsPrinter << ")";
 // TYPE:   } else {
-// TYPE:     odsPrinter << ' ' << "x";
+// TYPE:     odsPrinter << "x";
 // TYPE:   }
 // TYPE:   odsPrinter.printStrippedAttrOrType(getA());
 
@@ -631,7 +656,7 @@ def TypeL : TestType<"TestN"> {
 
 // TYPE-LABEL: void TestOType::print
 // TYPE: if (!(!(getA() == int())))
-// TYPE: odsPrinter << ' ' << "?"
+// TYPE: odsPrinter << "?"
 // TYPE: else
 // TYPE: odsPrinter.printStrippedAttrOrType(getA())
 
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp
index 031e03071842f..9d04264dfc2ff 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp
@@ -1099,9 +1099,44 @@ void DefGenerator::emitParsePrintDispatch(ArrayRef<AttrOrTypeDef> defs) {
     parse.body() << llvm::formatv(getValueForMnemonic, defClass, parseOrGet);
 
     // If the def has no parameters and no printer, just print the mnemonic.
-    StringRef printDef = "";
-    if (hasParserPrinterDecl)
+    std::string printDef;
+    if (def.hasCustomAssemblyFormat()) {
+      // Custom format: the user's print() controls its own spacing.
       printDef = "\nt.print(printer);";
+    } else if (auto fmt = def.getAssemblyFormat()) {
+      // Declarative format: since print() no longer emits a leading space,
+      // add one here unless the format starts with punctuation or a
+      // space-eraser directive that should attach directly to the mnemonic.
+      //
+      // '(' starts an optional group, not a literal paren. Treating it as
+      // "no space needed" is correct when the first printed element inside
+      // the group is punctuation (the only in-tree case today, e.g.
+      // TBAARootAttr: `(`<` struct(params)^ `>`)?`). If a future attr/type
+      // has an optional group whose first element is a variable or keyword,
+      // this heuristic will incorrectly suppress the space.
+      // TODO: Query the parsed DefFormat structure instead of inspecting the
+      // raw format string to handle this case properly.
+      //
+      // Note: bare '<', '{', '[' cannot appear at position 0 of a valid
+      // format string (the format lexer rejects them); they must be
+      // backtick-quoted (e.g. `<`), which is handled by the '`' case below.
+      StringRef fmtStr = fmt->trim();
+      bool needsSpace = !fmtStr.empty();
+      if (needsSpace) {
+        char first = fmtStr[0];
+        // '(' starts an optional group (see NOTE above).
+        if (first == '(')
+          needsSpace = false;
+        // Backtick-quoted literal (e.g. `<`, `{`, `[`) or space-eraser (``)
+        else if (first == '`' && fmtStr.size() >= 2 &&
+                 StringRef("<{([`").contains(fmtStr[1]))
+          needsSpace = false;
+      }
+      if (needsSpace)
+        printDef = "\nprinter << ' ';\nt.print(printer);";
+      else
+        printDef = "\nt.print(printer);";
+    }
     printer.body() << llvm::formatv(printValue, defClass, printDef);
   }
   parse.body() << "    .Default([&](llvm::StringRef keyword, llvm::SMLoc) {\n"
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
index 348026eb99b00..9c12cf59ed083 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
@@ -751,9 +751,14 @@ void DefFormat::genPrinter(MethodBody &os) {
   os.indent();
   os << "::mlir::Builder odsBuilder(getContext());\n";
 
-  // Generate printers.
-  shouldEmitSpace = true;
-  lastWasPunctuation = false;
+  // Start with no leading space: the generated dispatcher
+  // (generatedAttributePrinter/generatedTypePrinter in AttrOrTypeDefGen.cpp)
+  // is responsible for emitting any space between the mnemonic and the first
+  // printed element. Set lastWasPunctuation = true so that the
+  // `!lastWasPunctuation` term in genVariablePrinter/genCustomPrinter also
+  // evaluates to false, fully suppressing any leading space from those paths.
+  shouldEmitSpace = false;
+  lastWasPunctuation = true;
   for (FormatElement *el : elements)
     genElementPrinter(el, ctx, os);
 }