[Mlir-commits] [mlir] [mlir] Switch to new LDBG macro (PR #150616)

Fri Jul 25 08:58:53 PDT 2025

https://github.com/jpienaar updated https://github.com/llvm/llvm-project/pull/150616

>From 52cea49be6f8bd14c22c693b2d05fdb3a4a0b53b Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jacques+gh at japienaar.info>
Date: Fri, 25 Jul 2025 12:48:31 +0000
Subject: [PATCH 1/2] [mlir] Switch to new LDBG macro

Change local variants to use new central one.
---
 .../Analysis/DataFlow/DeadCodeAnalysis.cpp    | 130 +++++++++---------
 .../Analysis/DataFlow/LivenessAnalysis.cpp    |  77 +++++------
 .../IR/BufferizableOpInterface.cpp            |   3 -
 .../Transforms/OptimizeAllocationLiveness.cpp |  16 +--
 .../GPU/TransformOps/GPUTransformOps.cpp      |  43 +++---
 mlir/lib/Dialect/GPU/TransformOps/Utils.cpp   |  24 +---
 .../Linalg/TransformOps/GPUHeuristics.cpp     |  55 ++++----
 .../TransformOps/LinalgTransformOps.cpp       |  27 ++--
 .../Linalg/Transforms/Vectorization.cpp       | 122 ++++++++--------
 .../NVGPU/TransformOps/NVGPUTransformOps.cpp  |   3 -
 .../Dialect/SCF/Transforms/LoopPipelining.cpp |  33 +++--
 mlir/lib/Dialect/SCF/Utils/Utils.cpp          |  15 +-
 .../Interfaces/TransformInterfaces.cpp        |  88 ++++++------
 .../Transforms/VectorEmulateNarrowType.cpp    |  11 +-
 .../Vector/Transforms/VectorUnroll.cpp        |  25 ++--
 mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp |  15 +-
 .../XeGPU/Transforms/XeGPUBlocking.cpp        |   9 +-
 .../Dialect/XeGPU/Transforms/XeGPUUnroll.cpp  |   9 +-
 mlir/lib/Transforms/RemoveDeadValues.cpp      |  54 ++++----
 .../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp |   2 -
 20 files changed, 351 insertions(+), 410 deletions(-)

diff --git a/mlir/lib/Analysis/DataFlow/DeadCodeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/DeadCodeAnalysis.cpp
index 1abdfcbf3496f..10874fd0feb5a 100644
--- a/mlir/lib/Analysis/DataFlow/DeadCodeAnalysis.cpp
+++ b/mlir/lib/Analysis/DataFlow/DeadCodeAnalysis.cpp
@@ -23,12 +23,11 @@
 #include "mlir/Support/LLVM.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include <cassert>
 #include <optional>
 
 #define DEBUG_TYPE "dead-code-analysis"
-#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 using namespace mlir;
 using namespace mlir::dataflow;
@@ -127,7 +126,8 @@ DeadCodeAnalysis::DeadCodeAnalysis(DataFlowSolver &solver)
 }
 
 LogicalResult DeadCodeAnalysis::initialize(Operation *top) {
-  LDBG("Initializing DeadCodeAnalysis for top-level op: " << top->getName());
+  LDBG() << "Initializing DeadCodeAnalysis for top-level op: "
+         << top->getName();
   // Mark the top-level blocks as executable.
   for (Region &region : top->getRegions()) {
     if (region.empty())
@@ -135,7 +135,7 @@ LogicalResult DeadCodeAnalysis::initialize(Operation *top) {
     auto *state =
         getOrCreate<Executable>(getProgramPointBefore(&region.front()));
     propagateIfChanged(state, state->setToLive());
-    LDBG("Marked entry block live for region in op: " << top->getName());
+    LDBG() << "Marked entry block live for region in op: " << top->getName();
   }
 
   // Mark as overdefined the predecessors of symbol callables with potentially
@@ -146,18 +146,18 @@ LogicalResult DeadCodeAnalysis::initialize(Operation *top) {
 }
 
 void DeadCodeAnalysis::initializeSymbolCallables(Operation *top) {
-  LDBG("[init] Entering initializeSymbolCallables for top-level op: "
-       << top->getName());
+  LDBG() << "[init] Entering initializeSymbolCallables for top-level op: "
+         << top->getName();
   analysisScope = top;
   auto walkFn = [&](Operation *symTable, bool allUsesVisible) {
-    LDBG("[init] Processing symbol table op: " << symTable->getName());
+    LDBG() << "[init] Processing symbol table op: " << symTable->getName();
     Region &symbolTableRegion = symTable->getRegion(0);
     Block *symbolTableBlock = &symbolTableRegion.front();
 
     bool foundSymbolCallable = false;
     for (auto callable : symbolTableBlock->getOps<CallableOpInterface>()) {
-      LDBG("[init] Found CallableOpInterface: "
-           << callable.getOperation()->getName());
+      LDBG() << "[init] Found CallableOpInterface: "
+             << callable.getOperation()->getName();
       Region *callableRegion = callable.getCallableRegion();
       if (!callableRegion)
         continue;
@@ -171,8 +171,8 @@ void DeadCodeAnalysis::initializeSymbolCallables(Operation *top) {
         auto *state =
             getOrCreate<PredecessorState>(getProgramPointAfter(callable));
         propagateIfChanged(state, state->setHasUnknownPredecessors());
-        LDBG("[init] Marked callable as having unknown predecessors: "
-             << callable.getOperation()->getName());
+        LDBG() << "[init] Marked callable as having unknown predecessors: "
+               << callable.getOperation()->getName();
       }
       foundSymbolCallable = true;
     }
@@ -187,15 +187,15 @@ void DeadCodeAnalysis::initializeSymbolCallables(Operation *top) {
     if (!uses) {
       // If we couldn't gather the symbol uses, conservatively assume that
       // we can't track information for any nested symbols.
-      LDBG("[init] Could not gather symbol uses, conservatively marking "
-           "all nested callables as having unknown predecessors");
+      LDBG() << "[init] Could not gather symbol uses, conservatively marking "
+                "all nested callables as having unknown predecessors";
       return top->walk([&](CallableOpInterface callable) {
         auto *state =
             getOrCreate<PredecessorState>(getProgramPointAfter(callable));
         propagateIfChanged(state, state->setHasUnknownPredecessors());
-        LDBG("[init] Marked nested callable as "
-             "having unknown predecessors: "
-             << callable.getOperation()->getName());
+        LDBG() << "[init] Marked nested callable as "
+                  "having unknown predecessors: "
+               << callable.getOperation()->getName();
       });
     }
 
@@ -209,15 +209,15 @@ void DeadCodeAnalysis::initializeSymbolCallables(Operation *top) {
         continue;
       auto *state = getOrCreate<PredecessorState>(getProgramPointAfter(symbol));
       propagateIfChanged(state, state->setHasUnknownPredecessors());
-      LDBG("[init] Found non-call use for symbol, "
-           "marked as having unknown predecessors: "
-           << symbol->getName());
+      LDBG() << "[init] Found non-call use for symbol, "
+                "marked as having unknown predecessors: "
+             << symbol->getName();
     }
   };
   SymbolTable::walkSymbolTables(top, /*allSymUsesVisible=*/!top->getBlock(),
                                 walkFn);
-  LDBG("[init] Finished initializeSymbolCallables for top-level op: "
-       << top->getName());
+  LDBG() << "[init] Finished initializeSymbolCallables for top-level op: "
+         << top->getName();
 }
 
 /// Returns true if the operation is a returning terminator in region
@@ -229,14 +229,14 @@ static bool isRegionOrCallableReturn(Operation *op) {
 }
 
 LogicalResult DeadCodeAnalysis::initializeRecursively(Operation *op) {
-  LDBG("[init] Entering initializeRecursively for op: " << op->getName()
-                                                        << " at " << op);
+  LDBG() << "[init] Entering initializeRecursively for op: " << op->getName()
+         << " at " << op;
   // Initialize the analysis by visiting every op with control-flow semantics.
   if (op->getNumRegions() || op->getNumSuccessors() ||
       isRegionOrCallableReturn(op) || isa<CallOpInterface>(op)) {
-    LDBG("[init] Visiting op with control-flow semantics: " << *op);
-    // When the liveness of the parent block changes, make sure to re-invoke the
-    // analysis on the op.
+    LDBG() << "[init] Visiting op with control-flow semantics: " << *op;
+    // When the liveness of the parent block changes, make sure to
+    // re-invoke the analysis on the op.
     if (op->getBlock())
       getOrCreate<Executable>(getProgramPointBefore(op->getBlock()))
           ->blockContentSubscribe(this);
@@ -246,21 +246,21 @@ LogicalResult DeadCodeAnalysis::initializeRecursively(Operation *op) {
   }
   // Recurse on nested operations.
   for (Region &region : op->getRegions()) {
-    LDBG("[init] Recursing into region of op: " << op->getName());
+    LDBG() << "[init] Recursing into region of op: " << op->getName();
     for (Operation &nestedOp : region.getOps()) {
-      LDBG("[init] Recursing into nested op: " << nestedOp.getName() << " at "
-                                               << &nestedOp);
+      LDBG() << "[init] Recursing into nested op: " << nestedOp.getName()
+             << " at " << &nestedOp;
       if (failed(initializeRecursively(&nestedOp)))
         return failure();
     }
   }
-  LDBG("[init] Finished initializeRecursively for op: " << op->getName()
-                                                        << " at " << op);
+  LDBG() << "[init] Finished initializeRecursively for op: " << op->getName()
+         << " at " << op;
   return success();
 }
 
 void DeadCodeAnalysis::markEdgeLive(Block *from, Block *to) {
-  LDBG("Marking edge live from block " << from << " to block " << to);
+  LDBG() << "Marking edge live from block " << from << " to block " << to;
   auto *state = getOrCreate<Executable>(getProgramPointBefore(to));
   propagateIfChanged(state, state->setToLive());
   auto *edgeState =
@@ -269,35 +269,35 @@ void DeadCodeAnalysis::markEdgeLive(Block *from, Block *to) {
 }
 
 void DeadCodeAnalysis::markEntryBlocksLive(Operation *op) {
-  LDBG("Marking entry blocks live for op: " << op->getName());
+  LDBG() << "Marking entry blocks live for op: " << op->getName();
   for (Region &region : op->getRegions()) {
     if (region.empty())
       continue;
     auto *state =
         getOrCreate<Executable>(getProgramPointBefore(&region.front()));
     propagateIfChanged(state, state->setToLive());
-    LDBG("Marked entry block live for region in op: " << op->getName());
+    LDBG() << "Marked entry block live for region in op: " << op->getName();
   }
 }
 
 LogicalResult DeadCodeAnalysis::visit(ProgramPoint *point) {
-  LDBG("Visiting program point: " << point << " " << *point);
+  LDBG() << "Visiting program point: " << point << " " << *point;
   if (point->isBlockStart())
     return success();
   Operation *op = point->getPrevOp();
-  LDBG("Visiting operation: " << *op);
+  LDBG() << "Visiting operation: " << *op;
 
   // If the parent block is not executable, there is nothing to do.
   if (op->getBlock() != nullptr &&
       !getOrCreate<Executable>(getProgramPointBefore(op->getBlock()))
            ->isLive()) {
-    LDBG("Parent block not live, skipping op: " << *op);
+    LDBG() << "Parent block not live, skipping op: " << *op;
     return success();
   }
 
   // We have a live call op. Add this as a live predecessor of the callee.
   if (auto call = dyn_cast<CallOpInterface>(op)) {
-    LDBG("Visiting call operation: " << *op);
+    LDBG() << "Visiting call operation: " << *op;
     visitCallOperation(call);
   }
 
@@ -305,12 +305,12 @@ LogicalResult DeadCodeAnalysis::visit(ProgramPoint *point) {
   if (op->getNumRegions()) {
     // Check if we can reason about the region control-flow.
     if (auto branch = dyn_cast<RegionBranchOpInterface>(op)) {
-      LDBG("Visiting region branch operation: " << *op);
+      LDBG() << "Visiting region branch operation: " << *op;
       visitRegionBranchOperation(branch);
 
       // Check if this is a callable operation.
     } else if (auto callable = dyn_cast<CallableOpInterface>(op)) {
-      LDBG("Visiting callable operation: " << *op);
+      LDBG() << "Visiting callable operation: " << *op;
       const auto *callsites = getOrCreateFor<PredecessorState>(
           getProgramPointAfter(op), getProgramPointAfter(callable));
 
@@ -322,19 +322,19 @@ LogicalResult DeadCodeAnalysis::visit(ProgramPoint *point) {
 
       // Otherwise, conservatively mark all entry blocks as executable.
     } else {
-      LDBG("Marking all entry blocks live for op: " << *op);
+      LDBG() << "Marking all entry blocks live for op: " << *op;
       markEntryBlocksLive(op);
     }
   }
 
   if (isRegionOrCallableReturn(op)) {
     if (auto branch = dyn_cast<RegionBranchOpInterface>(op->getParentOp())) {
-      LDBG("Visiting region terminator: " << *op);
+      LDBG() << "Visiting region terminator: " << *op;
       // Visit the exiting terminator of a region.
       visitRegionTerminator(op, branch);
     } else if (auto callable =
                    dyn_cast<CallableOpInterface>(op->getParentOp())) {
-      LDBG("Visiting callable terminator: " << *op);
+      LDBG() << "Visiting callable terminator: " << *op;
       // Visit the exiting terminator of a callable.
       visitCallableTerminator(op, callable);
     }
@@ -343,12 +343,12 @@ LogicalResult DeadCodeAnalysis::visit(ProgramPoint *point) {
   if (op->getNumSuccessors()) {
     // Check if we can reason about the control-flow.
     if (auto branch = dyn_cast<BranchOpInterface>(op)) {
-      LDBG("Visiting branch operation: " << *op);
+      LDBG() << "Visiting branch operation: " << *op;
       visitBranchOperation(branch);
 
       // Otherwise, conservatively mark all successors as exectuable.
     } else {
-      LDBG("Marking all successors live for op: " << *op);
+      LDBG() << "Marking all successors live for op: " << *op;
       for (Block *successor : op->getSuccessors())
         markEdgeLive(op->getBlock(), successor);
     }
@@ -358,7 +358,7 @@ LogicalResult DeadCodeAnalysis::visit(ProgramPoint *point) {
 }
 
 void DeadCodeAnalysis::visitCallOperation(CallOpInterface call) {
-  LDBG("visitCallOperation: " << call.getOperation()->getName());
+  LDBG() << "visitCallOperation: " << call.getOperation()->getName();
   Operation *callableOp = call.resolveCallableInTable(&symbolTable);
 
   // A call to a externally-defined callable has unknown predecessors.
@@ -381,15 +381,15 @@ void DeadCodeAnalysis::visitCallOperation(CallOpInterface call) {
     auto *callsites =
         getOrCreate<PredecessorState>(getProgramPointAfter(callableOp));
     propagateIfChanged(callsites, callsites->join(call));
-    LDBG("Added callsite as predecessor for callable: "
-         << callableOp->getName());
+    LDBG() << "Added callsite as predecessor for callable: "
+           << callableOp->getName();
   } else {
     // Mark this call op's predecessors as overdefined.
     auto *predecessors =
         getOrCreate<PredecessorState>(getProgramPointAfter(call));
     propagateIfChanged(predecessors, predecessors->setHasUnknownPredecessors());
-    LDBG("Marked call op's predecessors as unknown for: "
-         << call.getOperation()->getName());
+    LDBG() << "Marked call op's predecessors as unknown for: "
+           << call.getOperation()->getName();
   }
 }
 
@@ -421,7 +421,7 @@ DeadCodeAnalysis::getOperandValues(Operation *op) {
 }
 
 void DeadCodeAnalysis::visitBranchOperation(BranchOpInterface branch) {
-  LDBG("visitBranchOperation: " << branch.getOperation()->getName());
+  LDBG() << "visitBranchOperation: " << branch.getOperation()->getName();
   // Try to deduce a single successor for the branch.
   std::optional<SmallVector<Attribute>> operands = getOperandValues(branch);
   if (!operands)
@@ -429,18 +429,18 @@ void DeadCodeAnalysis::visitBranchOperation(BranchOpInterface branch) {
 
   if (Block *successor = branch.getSuccessorForOperands(*operands)) {
     markEdgeLive(branch->getBlock(), successor);
-    LDBG("Branch has single successor: " << successor);
+    LDBG() << "Branch has single successor: " << successor;
   } else {
     // Otherwise, mark all successors as executable and outgoing edges.
     for (Block *successor : branch->getSuccessors())
       markEdgeLive(branch->getBlock(), successor);
-    LDBG("Branch has multiple/all successors live");
+    LDBG() << "Branch has multiple/all successors live";
   }
 }
 
 void DeadCodeAnalysis::visitRegionBranchOperation(
     RegionBranchOpInterface branch) {
-  LDBG("visitRegionBranchOperation: " << branch.getOperation()->getName());
+  LDBG() << "visitRegionBranchOperation: " << branch.getOperation()->getName();
   // Try to deduce which regions are executable.
   std::optional<SmallVector<Attribute>> operands = getOperandValues(branch);
   if (!operands)
@@ -457,19 +457,19 @@ void DeadCodeAnalysis::visitRegionBranchOperation(
     // Mark the entry block as executable.
     auto *state = getOrCreate<Executable>(point);
     propagateIfChanged(state, state->setToLive());
-    LDBG("Marked region successor live: " << point);
+    LDBG() << "Marked region successor live: " << point;
     // Add the parent op as a predecessor.
     auto *predecessors = getOrCreate<PredecessorState>(point);
     propagateIfChanged(
         predecessors,
         predecessors->join(branch, successor.getSuccessorInputs()));
-    LDBG("Added region branch as predecessor for successor: " << point);
+    LDBG() << "Added region branch as predecessor for successor: " << point;
   }
 }
 
 void DeadCodeAnalysis::visitRegionTerminator(Operation *op,
                                              RegionBranchOpInterface branch) {
-  LDBG("visitRegionTerminator: " << *op);
+  LDBG() << "visitRegionTerminator: " << *op;
   std::optional<SmallVector<Attribute>> operands = getOperandValues(op);
   if (!operands)
     return;
@@ -488,7 +488,7 @@ void DeadCodeAnalysis::visitRegionTerminator(Operation *op,
       auto *state =
           getOrCreate<Executable>(getProgramPointBefore(&region->front()));
       propagateIfChanged(state, state->setToLive());
-      LDBG("Marked region entry block live for region: " << region);
+      LDBG() << "Marked region entry block live for region: " << region;
       predecessors = getOrCreate<PredecessorState>(
           getProgramPointBefore(&region->front()));
     } else {
@@ -498,14 +498,14 @@ void DeadCodeAnalysis::visitRegionTerminator(Operation *op,
     }
     propagateIfChanged(predecessors,
                        predecessors->join(op, successor.getSuccessorInputs()));
-    LDBG("Added region terminator as predecessor for successor: "
-         << (successor.getSuccessor() ? "region entry" : "parent op"));
+    LDBG() << "Added region terminator as predecessor for successor: "
+           << (successor.getSuccessor() ? "region entry" : "parent op");
   }
 }
 
 void DeadCodeAnalysis::visitCallableTerminator(Operation *op,
                                                CallableOpInterface callable) {
-  LDBG("visitCallableTerminator: " << *op);
+  LDBG() << "visitCallableTerminator: " << *op;
   // Add as predecessors to all callsites this return op.
   auto *callsites = getOrCreateFor<PredecessorState>(
       getProgramPointAfter(op), getProgramPointAfter(callable));
@@ -516,15 +516,15 @@ void DeadCodeAnalysis::visitCallableTerminator(Operation *op,
         getOrCreate<PredecessorState>(getProgramPointAfter(predecessor));
     if (canResolve) {
       propagateIfChanged(predecessors, predecessors->join(op));
-      LDBG("Added callable terminator as predecessor for callsite: "
-           << predecessor->getName());
+      LDBG() << "Added callable terminator as predecessor for callsite: "
+             << predecessor->getName();
     } else {
       // If the terminator is not a return-like, then conservatively assume we
       // can't resolve the predecessor.
       propagateIfChanged(predecessors,
                          predecessors->setHasUnknownPredecessors());
-      LDBG("Could not resolve callable terminator for callsite: "
-           << predecessor->getName());
+      LDBG() << "Could not resolve callable terminator for callsite: "
+             << predecessor->getName();
     }
   }
 }
diff --git a/mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp b/mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp
index 6a12fe3acc2c2..94d52215b8f14 100644
--- a/mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp
+++ b/mlir/lib/Analysis/DataFlow/LivenessAnalysis.cpp
@@ -10,7 +10,7 @@
 #include <cassert>
 #include <mlir/Analysis/DataFlow/LivenessAnalysis.h>
 
-#include <llvm/Support/Debug.h>
+#include <llvm/Support/DebugLog.h>
 #include <mlir/Analysis/DataFlow/SparseAnalysis.h>
 #include <mlir/Analysis/DataFlow/Utils.h>
 #include <mlir/Analysis/DataFlowFramework.h>
@@ -22,7 +22,6 @@
 
 #define DEBUG_TYPE "liveness-analysis"
 #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 using namespace mlir;
 using namespace mlir::dataflow;
@@ -86,11 +85,11 @@ LivenessAnalysis::visitOperation(Operation *op, ArrayRef<Liveness *> operands,
              llvm::dbgs() << "\n");
   // This marks values of type (1.a) and (4) liveness as "live".
   if (!isMemoryEffectFree(op) || op->hasTrait<OpTrait::ReturnLike>()) {
-    LDBG("[visitOperation] Operation has memory effects or is "
-         "return-like, marking operands live");
+    LDBG() << "[visitOperation] Operation has memory effects or is "
+              "return-like, marking operands live";
     for (auto *operand : operands) {
-      LDBG(" [visitOperation] Marking operand live: "
-           << operand << " (" << operand->isLive << ")");
+      LDBG() << " [visitOperation] Marking operand live: " << operand << " ("
+             << operand->isLive << ")";
       propagateIfChanged(operand, operand->markLive());
     }
   }
@@ -99,28 +98,28 @@ LivenessAnalysis::visitOperation(Operation *op, ArrayRef<Liveness *> operands,
   bool foundLiveResult = false;
   for (const Liveness *r : results) {
     if (r->isLive && !foundLiveResult) {
-      LDBG("[visitOperation] Found live result, "
-           "meeting all operands with result: "
-           << r);
+      LDBG() << "[visitOperation] Found live result, "
+                "meeting all operands with result: "
+             << r;
       // It is assumed that each operand is used to compute each result of an
       // op. Thus, if at least one result is live, each operand is live.
       for (Liveness *operand : operands) {
-        LDBG(" [visitOperation] Meeting operand: " << operand
-                                                   << " with result: " << r);
+        LDBG() << " [visitOperation] Meeting operand: " << operand
+               << " with result: " << r;
         meet(operand, *r);
       }
       foundLiveResult = true;
     }
-    LDBG("[visitOperation] Adding dependency for result: " << r << " after op: "
-                                                           << *op);
+    LDBG() << "[visitOperation] Adding dependency for result: " << r
+           << " after op: " << *op;
     addDependency(const_cast<Liveness *>(r), getProgramPointAfter(op));
   }
   return success();
 }
 
 void LivenessAnalysis::visitBranchOperand(OpOperand &operand) {
-  LDBG("Visiting branch operand: " << operand.get()
-                                   << " in op: " << *operand.getOwner());
+  LDBG() << "Visiting branch operand: " << operand.get()
+         << " in op: " << *operand.getOwner();
   // We know (at the moment) and assume (for the future) that `operand` is a
   // non-forwarded branch operand of a `RegionBranchOpInterface`,
   // `BranchOpInterface`, `RegionBranchTerminatorOpInterface` or return-like op.
@@ -152,9 +151,9 @@ void LivenessAnalysis::visitBranchOperand(OpOperand &operand) {
       for (Value result : op->getResults()) {
         if (getLatticeElement(result)->isLive) {
           mayLive = true;
-          LDBG("[visitBranchOperand] Non-forwarded branch "
-               "operand may be live due to live result: "
-               << result);
+          LDBG() << "[visitBranchOperand] Non-forwarded branch "
+                    "operand may be live due to live result: "
+                 << result;
           break;
         }
       }
@@ -174,8 +173,8 @@ void LivenessAnalysis::visitBranchOperand(OpOperand &operand) {
     // Therefore, we conservatively consider the non-forwarded operand of the
     // branch operation may live.
     mayLive = true;
-    LDBG("[visitBranchOperand] Non-forwarded branch operand may "
-         "be live due to branch op interface");
+    LDBG() << "[visitBranchOperand] Non-forwarded branch operand may "
+              "be live due to branch op interface";
   } else {
     Operation *parentOp = op->getParentOp();
     assert(isa<RegionBranchOpInterface>(parentOp) &&
@@ -191,9 +190,9 @@ void LivenessAnalysis::visitBranchOperand(OpOperand &operand) {
       for (Value result : parentOp->getResults()) {
         if (getLatticeElement(result)->isLive) {
           mayLive = true;
-          LDBG("[visitBranchOperand] Non-forwarded branch "
-               "operand may be live due to parent live result: "
-               << result);
+          LDBG() << "[visitBranchOperand] Non-forwarded branch "
+                    "operand may be live due to parent live result: "
+                 << result;
           break;
         }
       }
@@ -214,9 +213,9 @@ void LivenessAnalysis::visitBranchOperand(OpOperand &operand) {
     for (Operation &nestedOp : *block) {
       if (!isMemoryEffectFree(&nestedOp)) {
         mayLive = true;
-        LDBG("Non-forwarded branch operand may be "
-             "live due to memory effect in block: "
-             << block);
+        LDBG() << "Non-forwarded branch operand may be "
+                  "live due to memory effect in block: "
+               << block;
         break;
       }
     }
@@ -224,7 +223,7 @@ void LivenessAnalysis::visitBranchOperand(OpOperand &operand) {
 
   if (mayLive) {
     Liveness *operandLiveness = getLatticeElement(operand.get());
-    LDBG("Marking branch operand live: " << operand.get());
+    LDBG() << "Marking branch operand live: " << operand.get();
     propagateIfChanged(operandLiveness, operandLiveness->markLive());
   }
 
@@ -236,7 +235,7 @@ void LivenessAnalysis::visitBranchOperand(OpOperand &operand) {
   SmallVector<const Liveness *, 4> resultsLiveness;
   for (const Value result : op->getResults())
     resultsLiveness.push_back(getLatticeElement(result));
-  LDBG("Visiting operation for non-forwarded branch operand: " << *op);
+  LDBG() << "Visiting operation for non-forwarded branch operand: " << *op;
   (void)visitOperation(op, operandLiveness, resultsLiveness);
 
   // We also visit the parent op with the parent's results and this operand if
@@ -249,14 +248,14 @@ void LivenessAnalysis::visitBranchOperand(OpOperand &operand) {
   SmallVector<const Liveness *, 4> parentResultsLiveness;
   for (const Value parentResult : parentOp->getResults())
     parentResultsLiveness.push_back(getLatticeElement(parentResult));
-  LDBG("Visiting parent operation for non-forwarded branch operand: "
-       << *parentOp);
+  LDBG() << "Visiting parent operation for non-forwarded branch operand: "
+         << *parentOp;
   (void)visitOperation(parentOp, operandLiveness, parentResultsLiveness);
 }
 
 void LivenessAnalysis::visitCallOperand(OpOperand &operand) {
-  LDBG("Visiting call operand: " << operand.get()
-                                 << " in op: " << *operand.getOwner());
+  LDBG() << "Visiting call operand: " << operand.get()
+         << " in op: " << *operand.getOwner();
   // We know (at the moment) and assume (for the future) that `operand` is a
   // non-forwarded call operand of an op implementing `CallOpInterface`.
   assert(isa<CallOpInterface>(operand.getOwner()) &&
@@ -269,18 +268,18 @@ void LivenessAnalysis::visitCallOperand(OpOperand &operand) {
   // This marks values of type (1.c) liveness as "live". A non-forwarded
   // call operand is live.
   Liveness *operandLiveness = getLatticeElement(operand.get());
-  LDBG("Marking call operand live: " << operand.get());
+  LDBG() << "Marking call operand live: " << operand.get();
   propagateIfChanged(operandLiveness, operandLiveness->markLive());
 }
 
 void LivenessAnalysis::setToExitState(Liveness *lattice) {
-  LDBG("setToExitState for lattice: " << lattice);
+  LDBG() << "setToExitState for lattice: " << lattice;
   if (lattice->isLive) {
-    LDBG("Lattice already live, nothing to do");
+    LDBG() << "Lattice already live, nothing to do";
     return;
   }
   // This marks values of type (2) liveness as "live".
-  LDBG("Marking lattice live due to exit state");
+  LDBG() << "Marking lattice live due to exit state";
   (void)lattice->markLive();
   propagateIfChanged(lattice, ChangeResult::Change);
 }
@@ -290,14 +289,14 @@ void LivenessAnalysis::setToExitState(Liveness *lattice) {
 //===----------------------------------------------------------------------===//
 
 RunLivenessAnalysis::RunLivenessAnalysis(Operation *op) {
-  LDBG("Constructing RunLivenessAnalysis for op: " << op->getName());
+  LDBG() << "Constructing RunLivenessAnalysis for op: " << op->getName();
   SymbolTableCollection symbolTable;
 
   loadBaselineAnalyses(solver);
   solver.load<LivenessAnalysis>(symbolTable);
-  LDBG("Initializing and running solver");
+  LDBG() << "Initializing and running solver";
   (void)solver.initializeAndRun(op);
-  LDBG("Dumping liveness state for op");
+  LDBG() << "Dumping liveness state for op";
 }
 
 const Liveness *RunLivenessAnalysis::getLiveness(Value val) {
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 825f63e7147c8..994d48505d24f 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -18,7 +18,6 @@
 #include "mlir/IR/Value.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "llvm/ADT/ScopeExit.h"
-#include "llvm/Support/Debug.h"
 
 //===----------------------------------------------------------------------===//
 // BufferizableOpInterface
@@ -35,8 +34,6 @@ namespace bufferization {
 MLIR_DEFINE_EXPLICIT_TYPE_ID(mlir::bufferization::AnalysisState)
 
 #define DEBUG_TYPE "bufferizable-op-interface"
-#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << (X))
 
 using namespace mlir;
 using namespace bufferization;
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OptimizeAllocationLiveness.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OptimizeAllocationLiveness.cpp
index 605a48709fe84..b8ddee6e935f3 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OptimizeAllocationLiveness.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OptimizeAllocationLiveness.cpp
@@ -18,11 +18,9 @@
 #include "mlir/IR/Operation.h"
 #include "mlir/IR/Value.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 
 #define DEBUG_TYPE "optimize-allocation-liveness"
-#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 namespace mlir {
 namespace bufferization {
@@ -65,8 +63,8 @@ Operation *findUserWithFreeSideEffect(Value value) {
       for (const auto &effect : effects) {
         if (isa<MemoryEffects::Free>(effect.getEffect())) {
           if (freeOpUser) {
-            LDBG("Multiple users with free effect found: " << *freeOpUser
-                                                           << " and " << *user);
+            LDBG() << "Multiple users with free effect found: " << *freeOpUser
+                   << " and " << *user;
             return nullptr;
           }
           freeOpUser = user;
@@ -121,7 +119,7 @@ struct OptimizeAllocationLiveness
         return WalkResult::advance();
 
       auto allocOp = memEffectOp;
-      LDBG("Checking alloc op: " << allocOp);
+      LDBG() << "Checking alloc op: " << allocOp;
 
       SmallVector<OpResult> allocationResults = collectAllocations(allocOp);
       // Multiple allocations from a single op are not considered here yet.
@@ -129,7 +127,7 @@ struct OptimizeAllocationLiveness
         return WalkResult::advance();
 
       OpResult allocResult = allocationResults[0];
-      LDBG("On allocation result: " << allocResult);
+      LDBG() << "On allocation result: " << allocResult;
 
       auto *deallocOp = findUserWithFreeSideEffect(allocResult);
       if (!deallocOp || (deallocOp->getBlock() != allocOp->getBlock())) {
@@ -159,12 +157,12 @@ struct OptimizeAllocationLiveness
       if (lastUser == nullptr) {
         return WalkResult::advance();
       }
-      LDBG("Last user found: " << *lastUser);
+      LDBG() << "Last user found: " << *lastUser;
       assert(lastUser->getBlock() == allocOp->getBlock());
       assert(lastUser->getBlock() == deallocOp->getBlock());
       // Move the dealloc op after the last user.
       deallocOp->moveAfter(lastUser);
-      LDBG("Moved dealloc op after: " << *lastUser);
+      LDBG() << "Moved dealloc op after: " << *lastUser;
 
       return WalkResult::advance();
     });
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index 1d8279c3199ea..21cb2f6d0a8e0 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -39,7 +39,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TypeSwitch.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InterleavedRange.h"
 #include "llvm/Support/LogicalResult.h"
@@ -51,11 +51,6 @@ using namespace mlir::transform;
 using namespace mlir::transform::gpu;
 
 #define DEBUG_TYPE "gpu-transforms"
-#define DEBUG_TYPE_ALIAS "gpu-transforms-alias"
-
-#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
-#define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")
 
 //===----------------------------------------------------------------------===//
 // Apply...ConversionPatternsOp
@@ -471,7 +466,7 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl(
     RewriterBase &rewriter, std::optional<TransformOpInterface> transformOp,
     scf::ForallOp forallOp, ArrayRef<int64_t> availableMappingSizes,
     ForallRewriteResult &result, const GpuIdBuilder &gpuIdBuilder) {
-  LDBG("--start rewriteOneForallCommonImpl");
+  LDBG() << "--start rewriteOneForallCommonImpl";
 
   // Step 1. Complete the mapping to a full mapping (with 1s) if necessary.
   auto numParallelIterations =
@@ -506,14 +501,14 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl(
     // Otherwise, we have a new insertion without a size -> use size 1.
     tmpMappingSizes.push_back(1);
   }
-  LDBG("----tmpMappingSizes extracted from scf.forall op: "
-       << llvm::interleaved(tmpMappingSizes));
+  LDBG() << "----tmpMappingSizes extracted from scf.forall op: "
+         << llvm::interleaved(tmpMappingSizes);
 
   // Step 2. sort the values by the corresponding DeviceMappingAttrInterface.
   SmallVector<int64_t> forallMappingSizes = getValuesSortedByKey(
       forallMappingAttrs.getArrayRef(), tmpMappingSizes, comparator);
-  LDBG("----forallMappingSizes: " << llvm::interleaved(forallMappingSizes));
-  LDBG("----forallMappingAttrs: " << llvm::interleaved(forallMappingAttrs));
+  LDBG() << "----forallMappingSizes: " << llvm::interleaved(forallMappingSizes);
+  LDBG() << "----forallMappingAttrs: " << llvm::interleaved(forallMappingAttrs);
 
   // Step 3. Generate the mappingIdOps using the provided generator.
   Location loc = forallOp.getLoc();
@@ -522,24 +517,24 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl(
   SmallVector<int64_t> originalBasis(availableMappingSizes);
   bool originalBasisWasProvided = !originalBasis.empty();
   if (!originalBasisWasProvided) {
-    LDBG("----originalBasis was not provided, deriving it and there will be no "
-         "predication");
+    LDBG() << "----originalBasis was not provided, deriving it and there will "
+              "be no "
+              "predication";
     originalBasis = forallMappingSizes;
     while (originalBasis.size() < 3)
       originalBasis.push_back(1);
   } else {
-    LDBG("----originalBasis was provided, using it, there will be predication");
+    LDBG() << "----originalBasis was provided, using it, there will be "
+              "predication";
   }
-  LLVM_DEBUG(
-      llvm::interleaveComma(originalBasis, DBGS() << "------originalBasis: ");
-      llvm::dbgs() << "\n");
+  LDBG() << "------originalBasis: " << llvm::interleaved(originalBasis);
 
   IdBuilderResult builderResult =
       gpuIdBuilder.idBuilder(rewriter, loc, forallMappingSizes, originalBasis);
   if (!builderResult.errorMsg.empty())
     return definiteFailureHelper(transformOp, forallOp, builderResult.errorMsg);
 
-  LLVM_DEBUG(DBGS() << builderResult);
+  LDBG() << builderResult;
 
   // Step 4. Map the induction variables to the mappingIdOps, this may involve
   // a permutation.
@@ -550,7 +545,7 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl(
            forallMappingAttrs.getArrayRef().take_front(forallOp.getRank()))) {
     auto mappingAttr = cast<DeviceMappingAttrInterface>(dim);
     Value peIdOp = mappingIdOps[mappingAttr.getRelativeIndex()];
-    LDBG("----map: " << iv << " to " << peIdOp);
+    LDBG() << "----map: " << iv << " to " << peIdOp;
     bvm.map(iv, peIdOp);
   }
 
@@ -596,9 +591,9 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl(
   // Step 8. Erase old op.
   rewriter.eraseOp(forallOp);
 
-  LDBG("----result forallMappingSizes: "
-       << llvm::interleaved(forallMappingSizes));
-  LDBG("----result mappingIdOps: " << llvm::interleaved(mappingIdOps));
+  LDBG() << "----result forallMappingSizes: "
+         << llvm::interleaved(forallMappingSizes);
+  LDBG() << "----result mappingIdOps: " << llvm::interleaved(mappingIdOps);
 
   result = ForallRewriteResult{forallMappingSizes, mappingIdOps};
   return DiagnosedSilenceableFailure::success();
@@ -612,7 +607,7 @@ DiagnosedSilenceableFailure mlir::transform::gpu::mapForallToBlocksImpl(
     RewriterBase &rewriter, TransformOpInterface transformOp,
     scf::ForallOp forallOp, SmallVectorImpl<int64_t> &gridDims,
     const GpuIdBuilder &gpuIdBuilder) {
-  LDBG("Start mapForallToBlocksImpl");
+  LDBG() << "Start mapForallToBlocksImpl";
 
   {
     // GPU-specific verifications. There is no better place to anchor
@@ -893,7 +888,7 @@ DiagnosedSilenceableFailure mlir::transform::gpu::mapNestedForallToThreadsImpl(
     RewriterBase &rewriter, std::optional<TransformOpInterface> transformOp,
     Operation *target, ArrayRef<int64_t> blockDims, int64_t warpSize,
     bool syncAfterDistribute) {
-  LDBG("Start mapNestedForallToThreadsImpl");
+  LDBG() << "Start mapNestedForallToThreadsImpl";
   if (blockDims.size() != 3) {
     return definiteFailureHelper(transformOp, target,
                                  "requires size-3 thread mapping");
diff --git a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
index 2fba09b001ffb..d73bedd759d0f 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
@@ -36,10 +36,6 @@ using namespace mlir::transform::gpu;
 
 #define DEBUG_TYPE "gpu-transforms"
 
-#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
-#define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")
-
 /// Build predicates to filter execution by only the activeIds. Along each
 /// dimension, 3 cases appear:
 ///   1. activeMappingSize > availableMappingSize: this is an unsupported case
@@ -54,15 +50,9 @@ buildPredicates(RewriterBase &rewriter, Location loc, ArrayRef<Value> activeIds,
                 ArrayRef<int64_t> activeMappingSizes,
                 ArrayRef<int64_t> availableMappingSizes,
                 std::string &errorMsg) {
-  // clang-format off
-  LLVM_DEBUG(
-    llvm::interleaveComma(
-      activeMappingSizes, DBGS() << "----activeMappingSizes: ");
-    DBGS() << "\n";
-    llvm::interleaveComma(
-      availableMappingSizes, DBGS() << "----availableMappingSizes: ");
-    DBGS() << "\n";);
-  // clang-format on
+  LDBG() << "----activeMappingSizes: " << llvm::interleaved(activeMappingSizes);
+  LDBG() << "----availableMappingSizes: "
+         << llvm::interleaved(availableMappingSizes);
 
   SmallVector<Value> predicateOps;
   for (auto [activeId, activeMappingSize, availableMappingSize] :
@@ -88,10 +78,8 @@ buildPredicates(RewriterBase &rewriter, Location loc, ArrayRef<Value> activeIds,
 template <typename ThreadOrBlockIdOp>
 static Value buildLinearId(RewriterBase &rewriter, Location loc,
                            ArrayRef<OpFoldResult> originalBasisOfr) {
-  LLVM_DEBUG(llvm::interleaveComma(
-                 originalBasisOfr,
-                 DBGS() << "----buildLinearId with originalBasisOfr:  ");
-             llvm::dbgs() << "\n");
+  LDBG() << "----buildLinearId with originalBasisOfr:  "
+         << llvm::interleaved(originalBasisOfr);
   assert(originalBasisOfr.size() == 3 && "expected 3 sizes");
   IndexType indexType = rewriter.getIndexType();
   AffineExpr tx, ty, tz, bdx, bdy;
@@ -179,7 +167,7 @@ commonLinearIdBuilderFn(int64_t multiplicity = 1,
     if (mask) {
       Value isActiveIdPredicate =
           mask.createIsActiveIdPredicate(rewriter, scaledLinearIdI64);
-      LDBG("------adjusting predicate with mask: " << isActiveIdPredicate);
+      LDBG() << "------adjusting predicate with mask: " << isActiveIdPredicate;
       predicateOps.push_back(isActiveIdPredicate);
     } else {
       // 4.b. Otherwise, handle predicates using physicalLinearId.
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp b/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp
index ce1b1b91b7140..dd5476688ca37 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp
@@ -22,7 +22,6 @@ using namespace mlir;
 
 #define DEBUG_TYPE "linalg-transforms"
 #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 static Attribute linearId0(MLIRContext *ctx) {
   return gpu::GPUThreadMappingAttr::get(ctx, gpu::MappingId::LinearDim0);
@@ -43,9 +42,8 @@ transform::gpu::CopyMappingInfo::CopyMappingInfo(MLIRContext *ctx,
   assert(!copySizes.empty() && copySizes.size() <= 3 &&
          "only 1,2,3-D copies are supported for now");
 
-  LDBG("START CopyMappingInfo, favorPredication: " << favorPredication);
-  LLVM_DEBUG(DBGS() << "--copy shape: " << llvm::interleaved(copySizes)
-                    << "\n");
+  LDBG() << "START CopyMappingInfo, favorPredication: " << favorPredication;
+  LDBG() << "--copy shape: " << llvm::interleaved(copySizes);
 
   // Greedily find the largest vector size that can be used to copy the most
   // minor dimension: we are in the business of filling kMaxVectorLoadBitWidth
@@ -53,20 +51,19 @@ transform::gpu::CopyMappingInfo::CopyMappingInfo(MLIRContext *ctx,
   int64_t desiredVectorSize = CopyMappingInfo::maxContiguousElementsToTransfer(
       desiredBitAlignment, copySizes.back(), elementalBitwidth);
 
-  LDBG("--greedily determined vectorSize: "
-       << desiredVectorSize << " elements of " << elementalBitwidth
-       << "b each -> " << (desiredVectorSize * elementalBitwidth)
-       << "b total out of a max of " << kMaxVectorLoadBitWidth << "b");
+  LDBG() << "--greedily determined vectorSize: " << desiredVectorSize
+         << " elements of " << elementalBitwidth << "b each -> "
+         << (desiredVectorSize * elementalBitwidth)
+         << "b total out of a max of " << kMaxVectorLoadBitWidth << "b";
 
   status = inferNumThreads(totalNumThreads, copySizes, desiredVectorSize,
                            favorPredication);
   if (status == Status::Invalid)
     return;
 
-  LLVM_DEBUG(DBGS() << "--copy: " << llvm::interleaved(copySizes) << "\n"
-                    << "--numThreads: " << llvm::interleaved(this->numThreads)
-                    << "\n"
-                    << "--vectorSize: " << this->vectorSize << "\n");
+  LDBG() << "--copy: " << llvm::interleaved(copySizes) << "\n"
+         << "--numThreads: " << llvm::interleaved(this->numThreads) << "\n"
+         << "--vectorSize: " << this->vectorSize;
   assert(this->numThreads.size() == copySizes.size() &&
          "compute copy mapping expected same number of threads and copy sizes");
 
@@ -140,7 +137,7 @@ static SmallVector<int64_t> maximizeNumThreads(ArrayRef<int64_t> sizes,
          "currentIndex out of bounds");
   std::string indent(2 * currentIndex, '-');
   if (static_cast<size_t>(currentIndex) == sizes.size() - 1) {
-    LDBG(indent << "mandated globalBest: " << sizes[currentIndex]);
+    LDBG() << indent << "mandated globalBest: " << sizes[currentIndex];
     return SmallVector<int64_t>{sizes[currentIndex]};
   }
 
@@ -149,16 +146,16 @@ static SmallVector<int64_t> maximizeNumThreads(ArrayRef<int64_t> sizes,
   SmallVector<int64_t> factors = getFactors(s);
   SmallVector<int64_t> localThreadsPerDim;
   localThreadsPerDim.reserve(sizes.size());
-  LDBG(indent << "maximizeNumThreads in " << s
-              << " with limit: " << maxNumThreads);
+  LDBG() << indent << "maximizeNumThreads in " << s
+         << " with limit: " << maxNumThreads;
   for (auto factor : factors) {
     auto nestedThreadsPerDim =
         maximizeNumThreads(sizes, currentIndex + 1, maxNumThreads / factor);
     int64_t localBest = factor * product(nestedThreadsPerDim);
     if (localBest > best && localBest <= maxNumThreads) {
-      LDBG(indent << "new localBest: " << localBest);
-      LDBG(indent << "nestedThreadsPerDim: "
-                  << llvm::interleaved(nestedThreadsPerDim));
+      LDBG() << indent << "new localBest: " << localBest;
+      LDBG() << indent << "nestedThreadsPerDim: "
+             << llvm::interleaved(nestedThreadsPerDim);
       localThreadsPerDim.clear();
       localThreadsPerDim.push_back(factor);
       llvm::append_range(localThreadsPerDim, nestedThreadsPerDim);
@@ -166,8 +163,8 @@ static SmallVector<int64_t> maximizeNumThreads(ArrayRef<int64_t> sizes,
     }
   }
 
-  LDBG(indent << "found globalBest: " << best);
-  LDBG(indent << "numThreads: " << llvm::interleaved(localThreadsPerDim));
+  LDBG() << indent << "found globalBest: " << best;
+  LDBG() << indent << "numThreads: " << llvm::interleaved(localThreadsPerDim);
   return localThreadsPerDim;
 }
 
@@ -192,8 +189,8 @@ transform::gpu::CopyMappingInfo::inferNumThreads(int64_t totalNumThreads,
       if (status == Status::Success || status == Status::Invalid)
         return status;
 
-      LDBG("requires predication, try reducing vector size to "
-           << (localVectorSize / 2));
+      LDBG() << "requires predication, try reducing vector size to "
+             << (localVectorSize / 2);
     }
   }
 
@@ -210,8 +207,8 @@ transform::gpu::CopyMappingInfo::inferNumThreadsImpl(
   assert(sizes.back() % desiredVectorSize == 0 &&
          "most-minor size not divisible by actualVectorSize");
 
-  LDBG("inferNumThreadsImpl with totalNumThreads: "
-       << totalNumThreads << " and vectorSize: " << desiredVectorSize);
+  LDBG() << "inferNumThreadsImpl with totalNumThreads: " << totalNumThreads
+         << " and vectorSize: " << desiredVectorSize;
 
   // Scale the most minor size to account for the chosen vector size and
   // maximize the number of threads without exceeding the total number of
@@ -219,22 +216,22 @@ transform::gpu::CopyMappingInfo::inferNumThreadsImpl(
   SmallVector<int64_t> scaledSizes(sizes);
   scaledSizes.back() /= desiredVectorSize;
   if (scaledSizes.back() > totalNumThreads) {
-    LDBG("--Too few threads given the required vector size -> FAIL");
+    LDBG() << "--Too few threads given the required vector size -> FAIL";
     return Status::Invalid;
   }
   SmallVector<int64_t> inferredNumThreads =
       maximizeNumThreads(scaledSizes, 0, totalNumThreads);
 
-  LDBG("inferred numThreads: " << llvm::interleaved(inferredNumThreads));
-  LDBG("computed actualVectorSize: " << desiredVectorSize);
+  LDBG() << "inferred numThreads: " << llvm::interleaved(inferredNumThreads);
+  LDBG() << "computed actualVectorSize: " << desiredVectorSize;
 
   // Corner case: we cannot use more threads than available. If the dimension of
   // the copy is so bad it is because higher-level tiling did not do its job, we
   // do not try to recover from it here.
   int64_t totalNumThreadsUsed = product(inferredNumThreads);
-  LDBG("--totalNumThreadsUsed: " << totalNumThreadsUsed);
+  LDBG() << "--totalNumThreadsUsed: " << totalNumThreadsUsed;
   if (totalNumThreadsUsed == 0 || totalNumThreadsUsed > totalNumThreads) {
-    LDBG("--Too few threads given the required vector size -> FAIL");
+    LDBG() << "--Too few threads given the required vector size -> FAIL";
     return Status::Invalid;
   }
 
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 9f523e9d78109..bb0861340ad92 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -40,7 +40,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/TypeSwitch.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include "llvm/Support/LogicalResult.h"
 #include <type_traits>
 
@@ -49,9 +49,6 @@ using namespace mlir::linalg;
 using namespace mlir::transform;
 
 #define DEBUG_TYPE "linalg-transforms"
-#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define DBGSNL() (llvm::dbgs() << "\n")
-#define LDBG(X) LLVM_DEBUG(DBGS() << (X) << "\n")
 
 /// Attempts to apply the pattern specified as template argument to the given
 /// operation. The pattern is expected to have a `returningMatchAndRewrite`
@@ -773,7 +770,7 @@ static bool sameOrEquivalentIterArg(Value src, Value dst) {
 static std::tuple<SmallVector<Operation *>, Operation *>
 tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag,
                            Operation *producerOp, Operation *containingOp) {
-  LLVM_DEBUG(DBGS() << "Try to fuse a direct extract use\n");
+  LDBG() << "Try to fuse a direct extract use";
   auto tileableProducer = dyn_cast<TilingInterface>(producerOp);
   if (!tileableProducer) {
     diag.attachNote(producerOp->getLoc())
@@ -838,7 +835,7 @@ tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag,
   // Tile the producer.
   int64_t resultNumber =
       cast<OpResult>(sliceOpToTile.getSource()).getResultNumber();
-  LLVM_DEBUG(DBGS() << "resultNumber: " << resultNumber << "\n");
+  LDBG() << "resultNumber: " << resultNumber;
 
   SmallVector<OpFoldResult> offsets = sliceOpToTile.getMixedOffsets();
   SmallVector<OpFoldResult> sizes = sliceOpToTile.getMixedSizes();
@@ -855,7 +852,7 @@ tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag,
 
 #ifndef NDEBUG
   for (auto *tiledOp : tileAndFuseResult->tiledOps) {
-    LLVM_DEBUG(DBGS() << "tiledProducer: " << *tiledOp << "\n");
+    LDBG() << "tiledProducer: " << *tiledOp;
   }
 #endif
 
@@ -894,7 +891,7 @@ static SmallVector<Operation *>
 tileAndFuseFirstExtractUseThroughContainingOpBlockArgument(
     RewriterBase &rewriter, Diagnostic &diag, Operation *producerOp,
     Operation *containingOp) {
-  LLVM_DEBUG(DBGS() << "Try to fuse an extract use through block argument\n");
+  LDBG() << "Try to fuse an extract use through block argument";
 
   auto tileableProducer = dyn_cast<TilingInterface>(producerOp);
   if (!tileableProducer) {
@@ -947,7 +944,7 @@ tileAndFuseFirstExtractUseThroughContainingOpBlockArgument(
   // Replace the use in the tileableProducer before tiling: clone, replace and
   // then tile.
   int64_t resultNumber = cast<OpResult>(pUse->get()).getResultNumber();
-  LLVM_DEBUG(DBGS() << "resultNumber: " << resultNumber << "\n");
+  LDBG() << "resultNumber: " << resultNumber;
 
   // Gather destination tensors.
   SmallVector<Value> destinationTensors;
@@ -996,7 +993,7 @@ tileAndFuseFirstExtractUseThroughContainingOpBlockArgument(
 static Operation *cloneAndFuseFirstUse(RewriterBase &rewriter, Diagnostic &diag,
                                        Operation *producerOp,
                                        Operation *containingOp) {
-  LLVM_DEBUG(DBGS() << "Try to fuse an use by cloning\n");
+  LDBG() << "Try to fuse an use by cloning";
 
   // Gather all uses inside the containing op.
   SmallVector<OpOperand *> uses;
@@ -1030,7 +1027,7 @@ static Operation *cloneAndFuseFirstUse(RewriterBase &rewriter, Diagnostic &diag,
   assert(!isa<tensor::ParallelInsertSliceOp>(use->getOwner()) &&
          "Parallel insert slice is not a valid clone destination");
   unsigned resultNumber = cast<OpResult>(use->get()).getResultNumber();
-  LLVM_DEBUG(DBGS() << "resultNumber: " << resultNumber << "\n");
+  LDBG() << "resultNumber: " << resultNumber;
 
   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPoint(use->getOwner());
@@ -1113,7 +1110,7 @@ transform::FuseIntoContainingOp::apply(transform::TransformRewriter &rewriter,
     auto [tiledOps, newContainingOp] =
         tileAndFuseFirstExtractUse(rewriter, diag, producerOp, containingOp);
     if (!tiledOps.empty()) {
-      LLVM_DEBUG(DBGS() << "\nFused a direct extract use\n" << *containingOp);
+      LDBG() << "\nFused a direct extract use\n" << *containingOp;
       fusedOps.append(tiledOps);
       if (newContainingOp) {
         // Update handles associated with the containing op so we don't need to
@@ -1139,8 +1136,8 @@ transform::FuseIntoContainingOp::apply(transform::TransformRewriter &rewriter,
         tileAndFuseFirstExtractUseThroughContainingOpBlockArgument(
             rewriter, diag, producerOp, containingOp);
     if (!tiledContainingOpOperand.empty()) {
-      LLVM_DEBUG(DBGS() << "\nFused an extract use through block argument\n"
-                        << *containingOp);
+      LDBG() << "\nFused an extract use through block argument\n"
+             << *containingOp;
       fusedOps.append(tiledContainingOpOperand);
       continue;
     }
@@ -1148,7 +1145,7 @@ transform::FuseIntoContainingOp::apply(transform::TransformRewriter &rewriter,
     Operation *cloned =
         cloneAndFuseFirstUse(rewriter, diag, producerOp, containingOp);
     if (cloned) {
-      LLVM_DEBUG(DBGS() << "\nFused an use by cloning\n" << *containingOp);
+      LDBG() << "\nFused an use by cloning\n" << *containingOp;
       fusedOps.push_back(cloned);
       continue;
     }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 78c6bd13aba3a..cb6ca7022098a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -38,7 +38,8 @@
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TypeSwitch.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
+#include "llvm/Support/InterleavedRange.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <optional>
@@ -48,9 +49,6 @@ using namespace mlir::linalg;
 
 #define DEBUG_TYPE "linalg-vectorization"
 
-#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
-
 /// Try to vectorize `convOp` as a convolution.
 static FailureOr<Operation *>
 vectorizeConvolution(RewriterBase &rewriter, LinalgOp convOp,
@@ -403,12 +401,8 @@ LogicalResult VectorizationState::initState(RewriterBase &rewriter,
     scalableVecDims.append(linalgOp.getNumLoops(), false);
   }
 
-  LDBG("Canonical vector shape: ");
-  LLVM_DEBUG(llvm::interleaveComma(canonicalVecShape, llvm::dbgs()));
-  LLVM_DEBUG(llvm::dbgs() << "\n");
-  LDBG("Scalable vector dims: ");
-  LLVM_DEBUG(llvm::interleaveComma(scalableVecDims, llvm::dbgs()));
-  LLVM_DEBUG(llvm::dbgs() << "\n");
+  LDBG() << "Canonical vector shape: " << llvm::interleaved(canonicalVecShape);
+  LDBG() << "Scalable vector dims: " << llvm::interleaved(scalableVecDims);
 
   if (ShapedType::isDynamicShape(canonicalVecShape))
     return failure();
@@ -452,14 +446,14 @@ Value VectorizationState::getOrCreateMaskFor(
                       : AffineMap::getMultiDimIdentityMap(
                             linalgOp.getNumLoops(), rewriter.getContext());
 
-  LDBG("Masking map: " << maskingMap << "\n");
+  LDBG() << "Masking map: " << maskingMap;
 
   // Return the active mask for the masking map of this operation if it was
   // already created.
   auto activeMaskIt = activeMaskCache.find(maskingMap);
   if (activeMaskIt != activeMaskCache.end()) {
     Value mask = activeMaskIt->second;
-    LDBG("Reusing mask: " << mask << "\n");
+    LDBG() << "Reusing mask: " << mask;
     return mask;
   }
 
@@ -474,12 +468,10 @@ Value VectorizationState::getOrCreateMaskFor(
   auto maskType = getCanonicalVecType(rewriter.getI1Type(), maskingMap);
   auto maskShape = maskType.getShape();
 
-  LDBG("Mask shape: ");
-  LLVM_DEBUG(llvm::interleaveComma(maskShape, llvm::dbgs()));
-  LLVM_DEBUG(llvm::dbgs() << "\n");
+  LDBG() << "Mask shape: " << llvm::interleaved(maskShape);
 
   if (permutedStaticSizes == maskShape) {
-    LDBG("Masking is not needed for masking map: " << maskingMap << "\n");
+    LDBG() << "Masking is not needed for masking map: " << maskingMap;
     activeMaskCache[maskingMap] = Value();
     return Value();
   }
@@ -519,7 +511,7 @@ Operation *
 VectorizationState::maskOperation(RewriterBase &rewriter, Operation *opToMask,
                                   LinalgOp linalgOp,
                                   std::optional<AffineMap> maybeIndexingMap) {
-  LDBG("Trying to mask: " << *opToMask << "\n");
+  LDBG() << "Trying to mask: " << *opToMask;
 
   std::optional<AffineMap> maybeMaskingMap = std::nullopt;
   if (maybeIndexingMap)
@@ -530,7 +522,7 @@ VectorizationState::maskOperation(RewriterBase &rewriter, Operation *opToMask,
       getOrCreateMaskFor(rewriter, opToMask, linalgOp, maybeMaskingMap);
 
   if (!mask) {
-    LDBG("No mask required\n");
+    LDBG() << "No mask required";
     return opToMask;
   }
 
@@ -544,7 +536,7 @@ VectorizationState::maskOperation(RewriterBase &rewriter, Operation *opToMask,
     rewriter.replaceAllUsesExcept(resVal, maskOp.getResult(resIdx),
                                   maskOpTerminator);
 
-  LDBG("Masked operation: " << *maskOp << "\n");
+  LDBG() << "Masked operation: " << *maskOp;
   return maskOp;
 }
 
@@ -748,7 +740,7 @@ static Value buildVectorWrite(RewriterBase &rewriter, Value value,
     maskedWriteOp.setInBoundsAttr(rewriter.getBoolArrayAttr(inBounds));
   }
 
-  LDBG("vectorized op: " << *write << "\n");
+  LDBG() << "vectorized op: " << *write;
   if (!write->getResults().empty())
     return write->getResult(0);
   return Value();
@@ -1090,7 +1082,7 @@ getTensorExtractMemoryAccessPattern(tensor::ExtractOp extractOp,
   }
 
   if (!leadingIdxsLoopInvariant) {
-    LDBG("Found gather load: " << extractOp);
+    LDBG() << "Found gather load: " << extractOp;
     return VectorMemoryAccessKind::Gather;
   }
 
@@ -1104,7 +1096,7 @@ getTensorExtractMemoryAccessPattern(tensor::ExtractOp extractOp,
   // If the trailing index is loop invariant then this is a scalar load.
   if (leadingIdxsLoopInvariant &&
       isLoopInvariantIdx(linalgOp, extractOpTrailingIdx, resType)) {
-    LDBG("Found scalar broadcast load: " << extractOp);
+    LDBG() << "Found scalar broadcast load: " << extractOp;
 
     return VectorMemoryAccessKind::ScalarBroadcast;
   }
@@ -1122,12 +1114,12 @@ getTensorExtractMemoryAccessPattern(tensor::ExtractOp extractOp,
   isContiguousLoad &= (foundIndexOp && isRowVector);
 
   if (isContiguousLoad) {
-    LDBG("Found contigous load: " << extractOp);
+    LDBG() << "Found contigous load: " << extractOp;
     return VectorMemoryAccessKind::Contiguous;
   }
 
   // 4. Fallback case - gather load.
-  LDBG("Found gather load: " << extractOp);
+  LDBG() << "Found gather load: " << extractOp;
   return VectorMemoryAccessKind::Gather;
 }
 
@@ -1171,7 +1163,7 @@ vectorizeTensorExtract(RewriterBase &rewriter, VectorizationState &state,
         maskConstantOp, passThruConstantOp);
     gatherOp = state.maskOperation(rewriter, gatherOp, linalgOp);
 
-    LDBG("Vectorised as gather load: " << extractOp << "\n");
+    LDBG() << "Vectorised as gather load: " << extractOp;
     return VectorizationHookResult{VectorizationHookStatus::NewOp, gatherOp};
   }
 
@@ -1235,7 +1227,7 @@ vectorizeTensorExtract(RewriterBase &rewriter, VectorizationState &state,
     auto *maskedReadOp =
         mlir::vector::maskOperation(rewriter, transferReadOp, allTrue);
 
-    LDBG("Vectorised as scalar broadcast load: " << extractOp << "\n");
+    LDBG() << "Vectorised as scalar broadcast load: " << extractOp;
     return VectorizationHookResult{VectorizationHookStatus::NewOp,
                                    maskedReadOp};
   }
@@ -1262,7 +1254,7 @@ vectorizeTensorExtract(RewriterBase &rewriter, VectorizationState &state,
       rewriter, loc, resultType, extractOp.getTensor(), transferReadIdxs,
       /*padding=*/std::nullopt, permutationMap, inBounds);
 
-  LDBG("Vectorised as contiguous load: " << extractOp);
+  LDBG() << "Vectorised as contiguous load: " << extractOp;
   return VectorizationHookResult{VectorizationHookStatus::NewOp,
                                  transferReadOp};
 }
@@ -1310,7 +1302,7 @@ static VectorizationHookResult
 vectorizeOneOp(RewriterBase &rewriter, VectorizationState &state,
                LinalgOp linalgOp, Operation *op, const IRMapping &bvm,
                ArrayRef<CustomVectorizationHook> customVectorizationHooks) {
-  LDBG("vectorize op " << *op << "\n");
+  LDBG() << "vectorize op " << *op;
 
   // 1. Try to apply any CustomVectorizationHook.
   if (!customVectorizationHooks.empty()) {
@@ -1425,7 +1417,7 @@ static LogicalResult
 vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state,
                          LinalgOp linalgOp,
                          SmallVectorImpl<Value> &newResults) {
-  LDBG("Vectorizing operation as linalg generic\n");
+  LDBG() << "Vectorizing operation as linalg generic/n";
   Block *block = linalgOp.getBlock();
 
   // 2. Values defined above the region can only be broadcast for now. Make them
@@ -1490,8 +1482,8 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state,
       readValue = vector::ExtractOp::create(rewriter, loc, readValue,
                                             ArrayRef<int64_t>());
 
-    LDBG("New vectorized bbarg(" << bbarg.getArgNumber() << "): " << readValue
-                                 << "\n");
+    LDBG() << "New vectorized bbarg(" << bbarg.getArgNumber()
+           << "): " << readValue;
     bvm.map(bbarg, readValue);
     bvm.map(opOperand->get(), readValue);
   }
@@ -1523,13 +1515,13 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state,
     VectorizationHookResult result =
         vectorizeOneOp(rewriter, state, linalgOp, &op, bvm, hooks);
     if (result.status == VectorizationHookStatus::Failure) {
-      LDBG("failed to vectorize: " << op << "\n");
+      LDBG() << "failed to vectorize: " << op;
       return failure();
     }
     if (result.status == VectorizationHookStatus::NewOp) {
       Operation *maybeMaskedOp =
           state.maskOperation(rewriter, result.newOp, linalgOp);
-      LDBG("New vector op: " << *maybeMaskedOp << "\n");
+      LDBG() << "New vector op: " << *maybeMaskedOp;
       bvm.map(op.getResults(), maybeMaskedOp->getResults());
     }
   }
@@ -1925,7 +1917,7 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
       cast<ReifyRankedShapedTypeOpInterface>(unpackOp.getOperation())
           .reifyResultShapes(rewriter, reifiedRetShapes);
   if (status.failed()) {
-    LDBG("Unable to reify result shapes of " << unpackOp);
+    LDBG() << "Unable to reify result shapes of " << unpackOp;
     return failure();
   }
   Location loc = unpackOp->getLoc();
@@ -2010,7 +2002,7 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp,
 // ops that may not commute (e.g. linear reduction + non-linear instructions).
 static LogicalResult reductionPreconditions(LinalgOp op) {
   if (llvm::none_of(op.getIteratorTypesArray(), isReductionIterator)) {
-    LDBG("reduction precondition failed: no reduction iterator\n");
+    LDBG() << "reduction precondition failed: no reduction iterator";
     return failure();
   }
   for (OpOperand &opOperand : op.getDpsInitsMutable()) {
@@ -2020,7 +2012,7 @@ static LogicalResult reductionPreconditions(LinalgOp op) {
 
     Operation *reduceOp = matchLinalgReduction(&opOperand);
     if (!reduceOp || !getCombinerOpKind(reduceOp)) {
-      LDBG("reduction precondition failed: reduction detection failed\n");
+      LDBG() << "reduction precondition failed: reduction detection failed";
       return failure();
     }
   }
@@ -2031,13 +2023,13 @@ static LogicalResult
 vectorizeDynamicConvOpPrecondition(linalg::LinalgOp conv,
                                    bool flatten1DDepthwiseConv) {
   if (flatten1DDepthwiseConv) {
-    LDBG("Vectorization of flattened convs with dynamic shapes is not "
-         "supported\n");
+    LDBG() << "Vectorization of flattened convs with dynamic shapes is not "
+              "supported";
     return failure();
   }
 
   if (!isa<linalg::DepthwiseConv1DNwcWcOp>(conv)) {
-    LDBG("Not a 1D depth-wise WC conv, dynamic shapes are not supported\n");
+    LDBG() << "Not a 1D depth-wise WC conv, dynamic shapes are not supported";
     return failure();
   }
 
@@ -2047,8 +2039,8 @@ vectorizeDynamicConvOpPrecondition(linalg::LinalgOp conv,
   ArrayRef<int64_t> lhsShape = cast<ShapedType>(lhs.getType()).getShape();
   auto shapeWithoutCh = lhsShape.drop_back(1);
   if (ShapedType::isDynamicShape(shapeWithoutCh)) {
-    LDBG("Dynamically-shaped op vectorization precondition failed: only "
-         "channel dim can be dynamic\n");
+    LDBG() << "Dynamically-shaped op vectorization precondition failed: only "
+              "channel dim can be dynamic";
     return failure();
   }
 
@@ -2071,7 +2063,7 @@ vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op,
           op.getOperation()))
     return failure();
 
-  LDBG("Dynamically-shaped op meets vectorization pre-conditions\n");
+  LDBG() << "Dynamically-shaped op meets vectorization pre-conditions";
   return success();
 }
 
@@ -2083,7 +2075,7 @@ vectorizeUnPackOpPrecondition(linalg::UnPackOp unpackOp,
   if (llvm::any_of(unpackOp.getInnerTiles(), [](OpFoldResult res) {
         return !getConstantIntValue(res).has_value();
       })) {
-    LDBG("Inner-tiles must be constant: " << unpackOp << "\n");
+    LDBG() << "Inner-tiles must be constant: " << unpackOp;
     return failure();
   }
   ArrayRef<int64_t> resultShape = unpackOp.getDestType().getShape();
@@ -2123,7 +2115,7 @@ vectorizeInsertSliceOpPrecondition(tensor::InsertSliceOp sliceOp,
       !sourceType.hasStaticShape() && inputVectorSizes.empty();
 
   if (!padValue && isOutOfBoundsRead) {
-    LDBG("Failed to get a pad value for out-of-bounds read access\n");
+    LDBG() << "Failed to get a pad value for out-of-bounds read access";
     return failure();
   }
   return success();
@@ -2355,7 +2347,7 @@ static LogicalResult vectorizeLinalgOpPrecondition(
 
   if (linalgOp.hasDynamicShape() && failed(vectorizeDynamicLinalgOpPrecondition(
                                         linalgOp, flatten1DDepthwiseConv))) {
-    LDBG("Dynamically-shaped op failed vectorization pre-conditions\n");
+    LDBG() << "Dynamically-shaped op failed vectorization pre-conditions";
     return failure();
   }
 
@@ -2397,11 +2389,11 @@ static LogicalResult vectorizeLinalgOpPrecondition(
   // all indexing maps are projected permutations. For convs and stencils the
   // logic will need to evolve.
   if (!allIndexingsAreProjectedPermutation(linalgOp)) {
-    LDBG("precondition failed: not projected permutations\n");
+    LDBG() << "precondition failed: not projected permutations";
     return failure();
   }
   if (failed(reductionPreconditions(linalgOp))) {
-    LDBG("precondition failed: reduction preconditions\n");
+    LDBG() << "precondition failed: reduction preconditions";
     return failure();
   }
   return success();
@@ -2413,7 +2405,7 @@ vectorizePackOpPrecondition(linalg::PackOp packOp,
   auto padValue = packOp.getPaddingValue();
   Attribute cstAttr;
   if (padValue && !matchPattern(padValue, m_Constant(&cstAttr))) {
-    LDBG("pad value is not constant: " << packOp << "\n");
+    LDBG() << "pad value is not constant: " << packOp;
     return failure();
   }
   ArrayRef<int64_t> resultTensorShape = packOp.getDestType().getShape();
@@ -2433,7 +2425,7 @@ vectorizePackOpPrecondition(linalg::PackOp packOp,
   if (llvm::any_of(packOp.getInnerTiles(), [](OpFoldResult v) {
         return !getConstantIntValue(v).has_value();
       })) {
-    LDBG("inner_tiles must be constant: " << packOp << "\n");
+    LDBG() << "inner_tiles must be constant: " << packOp;
     return failure();
   }
 
@@ -2445,7 +2437,7 @@ vectorizePadOpPrecondition(tensor::PadOp padOp,
                            ArrayRef<int64_t> inputVectorSizes) {
   auto padValue = padOp.getConstantPaddingValue();
   if (!padValue) {
-    LDBG("pad value is not constant: " << padOp << "\n");
+    LDBG() << "pad value is not constant: " << padOp;
     return failure();
   }
 
@@ -2472,7 +2464,7 @@ vectorizePadOpPrecondition(tensor::PadOp padOp,
         return (!pad.has_value() || pad.value() != 0) &&
                resultTensorShape[pos] != 1;
       })) {
-    LDBG("low pad must all be zero for all non unit dims: " << padOp << "\n");
+    LDBG() << "low pad must all be zero for all non unit dims: " << padOp;
     return failure();
   }
 
@@ -2541,13 +2533,14 @@ vectorizeScalableVectorPrecondition(Operation *op,
   case utils::IteratorType::reduction: {
     // Check 3. above is met.
     if (iterators.size() != inputVectorSizes.size()) {
-      LDBG("Non-trailing reduction dim requested for scalable "
-           "vectorization\n");
+      LDBG() << "Non-trailing reduction dim requested for scalable "
+                "vectorization";
       return failure();
     }
     if (isa<linalg::MatmulOp>(op) || isa<linalg::MatmulTransposeAOp>(op)) {
-      LDBG("Scalable vectorization of the reduction dim in Matmul-like ops "
-           "is not supported\n");
+      LDBG()
+          << "Scalable vectorization of the reduction dim in Matmul-like ops "
+             "is not supported";
       return failure();
     }
     break;
@@ -2555,8 +2548,8 @@ vectorizeScalableVectorPrecondition(Operation *op,
   case utils::IteratorType::parallel: {
     // Check 1. and 2. above are met.
     if (seenNonUnitParallel) {
-      LDBG("Inner parallel dim not requested for scalable "
-           "vectorization\n");
+      LDBG() << "Inner parallel dim not requested for scalable "
+                "vectorization";
       return failure();
     }
     break;
@@ -2572,8 +2565,9 @@ vectorizeScalableVectorPrecondition(Operation *op,
     //    * iterators = [..., parallel, reduction]
     //    * scalable flags = [..., true, true]
     if (iterators.back() == utils::IteratorType::reduction) {
-      LDBG("Higher dim than the trailing reduction dim requested for scalable "
-           "vectorization\n");
+      LDBG() << "Higher dim than the trailing reduction dim requested for "
+                "scalable "
+                "vectorizatio";
       return failure();
     }
     scalableFlags.pop_back();
@@ -2739,7 +2733,7 @@ FailureOr<VectorizationResult> mlir::linalg::vectorize(
           .Default([](auto) { return failure(); });
 
   if (failed(vectorizeResult)) {
-    LDBG("Vectorization failed\n");
+    LDBG() << "Vectorization failed";
     return failure();
   }
 
@@ -3244,8 +3238,8 @@ static bool mayExistInterleavedUses(Operation *firstOp, Operation *secondOp,
                                     ValueRange values) {
   if (firstOp->getBlock() != secondOp->getBlock() ||
       !firstOp->isBeforeInBlock(secondOp)) {
-    LDBG("interleavedUses precondition failed, firstOp: "
-         << *firstOp << ", second op: " << *secondOp << "\n");
+    LDBG() << "interleavedUses precondition failed, firstOp: " << *firstOp
+           << ", second op: " << *secondOp;
     return true;
   }
   for (auto v : values) {
@@ -3257,8 +3251,8 @@ static bool mayExistInterleavedUses(Operation *firstOp, Operation *secondOp,
       if (owner->getBlock() == firstOp->getBlock() &&
           (owner->isBeforeInBlock(firstOp) || secondOp->isBeforeInBlock(owner)))
         continue;
-      LDBG(" found interleaved op " << *owner << ", firstOp: " << *firstOp
-                                    << ", second op: " << *secondOp << "\n");
+      LDBG() << " found interleaved op " << *owner << ", firstOp: " << *firstOp
+             << ", second op: " << *secondOp;
       return true;
     }
   }
diff --git a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp
index f5f0bfa4128aa..bc3e8b2b17fb1 100644
--- a/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/NVGPU/TransformOps/NVGPUTransformOps.cpp
@@ -38,9 +38,6 @@ using namespace mlir::NVVM;
 using namespace mlir::transform;
 
 #define DEBUG_TYPE "nvgpu-transforms"
-#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define DBGSNL() (llvm::dbgs() << "\n")
-#define LDBG(X) LLVM_DEBUG(DBGS() << (X) << "\n")
 
 //===----------------------------------------------------------------------===//
 // Apply...ConversionPatternsOp
diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
index 59828563a8196..1130538e51fb7 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp
@@ -19,12 +19,10 @@
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include "llvm/Support/MathExtras.h"
 
 #define DEBUG_TYPE "scf-loop-pipelining"
-#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 using namespace mlir;
 using namespace mlir::scf;
@@ -100,7 +98,7 @@ struct LoopPipelinerInternal {
 
 bool LoopPipelinerInternal::initializeLoopInfo(
     ForOp op, const PipeliningOption &options) {
-  LDBG("Start initializeLoopInfo");
+  LDBG() << "Start initializeLoopInfo";
   forOp = op;
   ub = forOp.getUpperBound();
   lb = forOp.getLowerBound();
@@ -109,7 +107,7 @@ bool LoopPipelinerInternal::initializeLoopInfo(
   std::vector<std::pair<Operation *, unsigned>> schedule;
   options.getScheduleFn(forOp, schedule);
   if (schedule.empty()) {
-    LDBG("--empty schedule -> BAIL");
+    LDBG() << "--empty schedule -> BAIL";
     return false;
   }
 
@@ -126,7 +124,7 @@ bool LoopPipelinerInternal::initializeLoopInfo(
   auto stepCst = getConstantIntValue(step);
   if (!upperBoundCst || !lowerBoundCst || !stepCst) {
     if (!options.supportDynamicLoops) {
-      LDBG("--dynamic loop not supported -> BAIL");
+      LDBG() << "--dynamic loop not supported -> BAIL";
       return false;
     }
   } else {
@@ -134,21 +132,21 @@ bool LoopPipelinerInternal::initializeLoopInfo(
     int64_t lbImm = lowerBoundCst.value();
     int64_t stepImm = stepCst.value();
     if (stepImm <= 0) {
-      LDBG("--invalid loop step -> BAIL");
+      LDBG() << "--invalid loop step -> BAIL";
       return false;
     }
     int64_t numIteration = llvm::divideCeilSigned(ubImm - lbImm, stepImm);
     if (numIteration >= maxStage) {
       dynamicLoop = false;
     } else if (!options.supportDynamicLoops) {
-      LDBG("--fewer loop iterations than pipeline stages -> BAIL");
+      LDBG() << "--fewer loop iterations than pipeline stages -> BAIL";
       return false;
     }
   }
   peelEpilogue = options.peelEpilogue;
   predicateFn = options.predicateFn;
   if ((!peelEpilogue || dynamicLoop) && predicateFn == nullptr) {
-    LDBG("--no epilogue or predicate set -> BAIL");
+    LDBG() << "--no epilogue or predicate set -> BAIL";
     return false;
   }
 
@@ -156,13 +154,13 @@ bool LoopPipelinerInternal::initializeLoopInfo(
   for (Operation &op : forOp.getBody()->without_terminator()) {
     if (!stages.contains(&op)) {
       op.emitOpError("not assigned a pipeline stage");
-      LDBG("--op not assigned a pipeline stage: " << op << " -> BAIL");
+      LDBG() << "--op not assigned a pipeline stage: " << op << " -> BAIL";
       return false;
     }
   }
 
   if (!verifySchedule()) {
-    LDBG("--invalid schedule: " << op << " -> BAIL");
+    LDBG() << "--invalid schedule: " << op << " -> BAIL";
     return false;
   }
 
@@ -173,15 +171,16 @@ bool LoopPipelinerInternal::initializeLoopInfo(
     (void)stageNum;
     if (op == forOp.getBody()->getTerminator()) {
       op->emitError("terminator should not be assigned a stage");
-      LDBG("--terminator should not be assigned stage: " << *op << " -> BAIL");
+      LDBG() << "--terminator should not be assigned stage: " << *op
+             << " -> BAIL";
       return false;
     }
     if (op->getBlock() != forOp.getBody()) {
       op->emitOpError("the owning Block of all operations assigned a stage "
                       "should be the loop body block");
-      LDBG("--the owning Block of all operations assigned a stage "
-           "should be the loop body block: "
-           << *op << " -> BAIL");
+      LDBG() << "--the owning Block of all operations assigned a stage "
+                "should be the loop body block: "
+             << *op << " -> BAIL";
       return false;
     }
   }
@@ -196,8 +195,8 @@ bool LoopPipelinerInternal::initializeLoopInfo(
                      return !def ||
                             (!stages.contains(def) && forOp->isAncestor(def));
                    })) {
-    LDBG("--only support loop carried dependency with a distance of 1 or "
-         "defined outside of the loop -> BAIL");
+    LDBG() << "--only support loop carried dependency with a distance of 1 or "
+              "defined outside of the loop -> BAIL";
     return false;
   }
   annotateFn = options.annotateFn;
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index 4025ec68ed134..e2e2f516bc9f0 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -525,13 +525,13 @@ LogicalResult mlir::loopUnrollJamByFactor(scf::ForOp forOp,
   // If any control operand of any inner loop of `forOp` is defined within
   // `forOp`, no unroll jam.
   if (!areInnerBoundsInvariant(forOp)) {
-    LDBG("failed to unroll and jam: inner bounds are not invariant");
+    LDBG() << "failed to unroll and jam: inner bounds are not invariant";
     return failure();
   }
 
   // Currently, for operations with results are not supported.
   if (forOp->getNumResults() > 0) {
-    LDBG("failed to unroll and jam: unsupported loop with results");
+    LDBG() << "failed to unroll and jam: unsupported loop with results";
     return failure();
   }
 
@@ -540,16 +540,17 @@ LogicalResult mlir::loopUnrollJamByFactor(scf::ForOp forOp,
   std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
   if (!tripCount.has_value()) {
     // If the trip count is dynamic, do not unroll & jam.
-    LDBG("failed to unroll and jam: trip count could not be determined");
+    LDBG() << "failed to unroll and jam: trip count could not be determined";
     return failure();
   }
   if (unrollJamFactor > *tripCount) {
-    LDBG("unroll and jam factor is greater than trip count, set factor to trip "
-         "count");
+    LDBG() << "unroll and jam factor is greater than trip count, set factor to "
+              "trip "
+              "count";
     unrollJamFactor = *tripCount;
   } else if (*tripCount % unrollJamFactor != 0) {
-    LDBG("failed to unroll and jam: unsupported trip count that is not a "
-         "multiple of unroll jam factor");
+    LDBG() << "failed to unroll and jam: unsupported trip count that is not a "
+              "multiple of unroll jam factor";
     return failure();
   }
 
diff --git a/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp
index 46628364231d5..452f23516edb9 100644
--- a/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp
+++ b/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InterleavedRange.h"
 
@@ -23,8 +24,8 @@
 #define DEBUG_TYPE_FULL "transform-dialect-full"
 #define DEBUG_PRINT_AFTER_ALL "transform-dialect-print-top-level-after-all"
 #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << (X))
-#define FULL_LDBG(X) DEBUG_WITH_TYPE(DEBUG_TYPE_FULL, (DBGS() << (X)))
+#define FULL_LDBG(X)                                                           \
+  DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), DEBUG_TYPE_FULL)
 
 using namespace mlir;
 
@@ -486,24 +487,20 @@ void transform::TransformState::recordOpHandleInvalidationOne(
       newlyInvalidated.count(otherHandle))
     return;
 
-  FULL_LDBG("--recordOpHandleInvalidationOne\n");
-  DEBUG_WITH_TYPE(DEBUG_TYPE_FULL, {
-    (DBGS() << "--ancestors: "
-            << llvm::interleaved(llvm::make_pointee_range(potentialAncestors))
-            << "\n");
-  });
+  FULL_LDBG() << "--recordOpHandleInvalidationOne";
+  FULL_LDBG() << "--ancestors: "
+              << llvm::interleaved(
+                     llvm::make_pointee_range(potentialAncestors));
 
   Operation *owner = consumingHandle.getOwner();
   unsigned operandNo = consumingHandle.getOperandNumber();
   for (Operation *ancestor : potentialAncestors) {
     // clang-format off
-    DEBUG_WITH_TYPE(DEBUG_TYPE_FULL,
-      { (DBGS() << "----handle one ancestor: " << *ancestor << "\n"); });
-    DEBUG_WITH_TYPE(DEBUG_TYPE_FULL,
-      { (DBGS() << "----of payload with name: "
-                << payloadOp->getName().getIdentifier() << "\n"); });
-    DEBUG_WITH_TYPE(DEBUG_TYPE_FULL,
-      { (DBGS() << "----of payload: " << *payloadOp << "\n"); });
+          FULL_LDBG() << "----handle one ancestor: " << *ancestor;;
+
+      FULL_LDBG() << "----of payload with name: "
+                << payloadOp->getName().getIdentifier();
+      FULL_LDBG() << "----of payload: " << *payloadOp;
     // clang-format on
     if (!ancestor->isAncestor(payloadOp))
       continue;
@@ -609,10 +606,8 @@ void transform::TransformState::recordOpHandleInvalidation(
     transform::TransformState::InvalidatedHandleMap &newlyInvalidated) const {
 
   if (potentialAncestors.empty()) {
-    DEBUG_WITH_TYPE(DEBUG_TYPE_FULL, {
-      (DBGS() << "----recording invalidation for empty handle: " << handle.get()
-              << "\n");
-    });
+    FULL_LDBG() << "----recording invalidation for empty handle: "
+                << handle.get();
 
     Operation *owner = handle.getOwner();
     unsigned operandNo = handle.getOperandNumber();
@@ -709,7 +704,7 @@ void transform::TransformState::recordValueHandleInvalidation(
 LogicalResult transform::TransformState::checkAndRecordHandleInvalidationImpl(
     transform::TransformOpInterface transform,
     transform::TransformState::InvalidatedHandleMap &newlyInvalidated) const {
-  FULL_LDBG("--Start checkAndRecordHandleInvalidation\n");
+  FULL_LDBG() << "--Start checkAndRecordHandleInvalidation";
   auto memoryEffectsIface =
       cast<MemoryEffectOpInterface>(transform.getOperation());
   SmallVector<MemoryEffects::EffectInstance> effects;
@@ -717,9 +712,7 @@ LogicalResult transform::TransformState::checkAndRecordHandleInvalidationImpl(
       transform::TransformMappingResource::get(), effects);
 
   for (OpOperand &target : transform->getOpOperands()) {
-    DEBUG_WITH_TYPE(DEBUG_TYPE_FULL, {
-      (DBGS() << "----iterate on handle: " << target.get() << "\n");
-    });
+    FULL_LDBG() << "----iterate on handle: " << target.get();
     // If the operand uses an invalidated handle, report it. If the operation
     // allows handles to point to repeated payload operations, only report
     // pre-existing invalidation errors. Otherwise, also report invalidations
@@ -727,14 +720,14 @@ LogicalResult transform::TransformState::checkAndRecordHandleInvalidationImpl(
     auto it = invalidatedHandles.find(target.get());
     auto nit = newlyInvalidated.find(target.get());
     if (it != invalidatedHandles.end()) {
-      FULL_LDBG("--End checkAndRecordHandleInvalidation, found already "
-                "invalidated -> FAILURE\n");
+      FULL_LDBG() << "--End checkAndRecordHandleInvalidation, found already "
+                     "invalidated -> FAILURE";
       return it->getSecond()(transform->getLoc()), failure();
     }
     if (!transform.allowsRepeatedHandleOperands() &&
         nit != newlyInvalidated.end()) {
-      FULL_LDBG("--End checkAndRecordHandleInvalidation, found newly "
-                "invalidated (by this op) -> FAILURE\n");
+      FULL_LDBG() << "--End checkAndRecordHandleInvalidation, found newly "
+                     "invalidated (by this op) -> FAILURE";
       return nit->getSecond()(transform->getLoc()), failure();
     }
 
@@ -745,27 +738,28 @@ LogicalResult transform::TransformState::checkAndRecordHandleInvalidationImpl(
              effect.getValue() == target.get();
     };
     if (llvm::any_of(effects, consumesTarget)) {
-      FULL_LDBG("----found consume effect\n");
+      FULL_LDBG() << "----found consume effect";
       if (llvm::isa<transform::TransformHandleTypeInterface>(
               target.get().getType())) {
-        FULL_LDBG("----recordOpHandleInvalidation\n");
+        FULL_LDBG() << "----recordOpHandleInvalidation";
         SmallVector<Operation *> payloadOps =
             llvm::to_vector(getPayloadOps(target.get()));
         recordOpHandleInvalidation(target, payloadOps, nullptr,
                                    newlyInvalidated);
       } else if (llvm::isa<transform::TransformValueHandleTypeInterface>(
                      target.get().getType())) {
-        FULL_LDBG("----recordValueHandleInvalidation\n");
+        FULL_LDBG() << "----recordValueHandleInvalidation";
         recordValueHandleInvalidation(target, newlyInvalidated);
       } else {
-        FULL_LDBG("----not a TransformHandle -> SKIP AND DROP ON THE FLOOR\n");
+        FULL_LDBG()
+            << "----not a TransformHandle -> SKIP AND DROP ON THE FLOOR";
       }
     } else {
-      FULL_LDBG("----no consume effect -> SKIP\n");
+      FULL_LDBG() << "----no consume effect -> SKIP";
     }
   }
 
-  FULL_LDBG("--End checkAndRecordHandleInvalidation -> SUCCESS\n");
+  FULL_LDBG() << "--End checkAndRecordHandleInvalidation -> SUCCESS";
   return success();
 }
 
@@ -823,9 +817,7 @@ transform::TransformState::applyTransform(TransformOpInterface transform) {
     transform->print(llvm::dbgs(), OpPrintingFlags().skipRegions());
     llvm::dbgs() << "\n";
   });
-  DEBUG_WITH_TYPE(DEBUG_TYPE_FULL,
-                  DBGS() << "Top-level payload before application:\n"
-                         << *getTopLevel() << "\n");
+  FULL_LDBG() << "Top-level payload before application:\n" << *getTopLevel();
   auto printOnFailureRAII = llvm::make_scope_exit([this] {
     (void)this;
     LLVM_DEBUG(DBGS() << "Failing Top-level payload:\n"; getTopLevel()->print(
@@ -837,47 +829,45 @@ transform::TransformState::applyTransform(TransformOpInterface transform) {
 
   // Expensive checks to detect invalid transform IR.
   if (options.getExpensiveChecksEnabled()) {
-    FULL_LDBG("ExpensiveChecksEnabled\n");
+    FULL_LDBG() << "ExpensiveChecksEnabled";
     if (failed(checkAndRecordHandleInvalidation(transform)))
       return DiagnosedSilenceableFailure::definiteFailure();
 
     for (OpOperand &operand : transform->getOpOperands()) {
-      DEBUG_WITH_TYPE(DEBUG_TYPE_FULL, {
-        (DBGS() << "iterate on handle: " << operand.get() << "\n");
-      });
+      FULL_LDBG() << "iterate on handle: " << operand.get();
       if (!isHandleConsumed(operand.get(), transform)) {
-        FULL_LDBG("--handle not consumed -> SKIP\n");
+        FULL_LDBG() << "--handle not consumed -> SKIP";
         continue;
       }
       if (transform.allowsRepeatedHandleOperands()) {
-        FULL_LDBG("--op allows repeated handles -> SKIP\n");
+        FULL_LDBG() << "--op allows repeated handles -> SKIP";
         continue;
       }
-      FULL_LDBG("--handle is consumed\n");
+      FULL_LDBG() << "--handle is consumed";
 
       Type operandType = operand.get().getType();
       if (llvm::isa<TransformHandleTypeInterface>(operandType)) {
-        FULL_LDBG("--checkRepeatedConsumptionInOperand for Operation*\n");
+        FULL_LDBG() << "--checkRepeatedConsumptionInOperand for Operation*";
         DiagnosedSilenceableFailure check =
             checkRepeatedConsumptionInOperand<Operation *>(
                 getPayloadOpsView(operand.get()), transform,
                 operand.getOperandNumber());
         if (!check.succeeded()) {
-          FULL_LDBG("----FAILED\n");
+          FULL_LDBG() << "----FAILED";
           return check;
         }
       } else if (llvm::isa<TransformValueHandleTypeInterface>(operandType)) {
-        FULL_LDBG("--checkRepeatedConsumptionInOperand For Value\n");
+        FULL_LDBG() << "--checkRepeatedConsumptionInOperand For Value";
         DiagnosedSilenceableFailure check =
             checkRepeatedConsumptionInOperand<Value>(
                 getPayloadValuesView(operand.get()), transform,
                 operand.getOperandNumber());
         if (!check.succeeded()) {
-          FULL_LDBG("----FAILED\n");
+          FULL_LDBG() << "----FAILED";
           return check;
         }
       } else {
-        FULL_LDBG("--not a TransformHandle -> SKIP AND DROP ON THE FLOOR\n");
+        FULL_LDBG() << "--not a TransformHandle -> SKIP AND DROP ON THE FLOOR";
       }
     }
   }
@@ -1277,7 +1267,7 @@ void transform::TrackingListener::notifyMatchFailure(
   LLVM_DEBUG({
     Diagnostic diag(loc, DiagnosticSeverity::Remark);
     reasonCallback(diag);
-    DBGS() << "Match Failure : " << diag.str() << "\n";
+    DBGS() << "Match Failure : " << diag.str();
   });
 }
 
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
index e6bb96f453fbc..91a484f7d463c 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
@@ -32,7 +32,7 @@
 #include "mlir/IR/Value.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
@@ -41,9 +41,6 @@
 using namespace mlir;
 
 #define DEBUG_TYPE "vector-narrow-type-emulation"
-#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define DBGSNL() (llvm::dbgs() << "\n")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 using VectorValue = TypedValue<VectorType>;
 using MemRefValue = TypedValue<MemRefType>;
@@ -1526,11 +1523,11 @@ BitCastBitsEnumerator::BitCastBitsEnumerator(VectorType sourceVectorType,
          "requires -D non-scalable vector type");
   int64_t sourceBitWidth = sourceVectorType.getElementTypeBitWidth();
   int64_t mostMinorSourceDim = sourceVectorType.getShape().back();
-  LDBG("sourceVectorType: " << sourceVectorType);
+  LDBG() << "sourceVectorType: " << sourceVectorType;
 
   int64_t targetBitWidth = targetVectorType.getElementTypeBitWidth();
   int64_t mostMinorTargetDim = targetVectorType.getShape().back();
-  LDBG("targetVectorType: " << targetVectorType);
+  LDBG() << "targetVectorType: " << targetVectorType;
 
   int64_t bitwidth = targetBitWidth * mostMinorTargetDim;
   (void)mostMinorSourceDim;
@@ -1555,7 +1552,7 @@ BitCastBitsEnumerator::BitCastBitsEnumerator(VectorType sourceVectorType,
 BitCastRewriter::BitCastRewriter(VectorType sourceVectorType,
                                  VectorType targetVectorType)
     : enumerator(BitCastBitsEnumerator(sourceVectorType, targetVectorType)) {
-  LDBG("\n" << enumerator.sourceElementRanges);
+  LDBG() << "\n" << enumerator.sourceElementRanges;
 }
 
 /// Verify that the precondition type meets the common preconditions for any
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
index 062c51f6a5071..501abecfacd04 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
@@ -16,13 +16,11 @@
 #include "mlir/Interfaces/VectorInterfaces.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include "llvm/Support/InterleavedRange.h"
 #include <optional>
 
 #define DEBUG_TYPE "vector-unroll"
-#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 using namespace mlir;
 using namespace mlir::vector;
@@ -90,10 +88,9 @@ static Operation *cloneOpWithOperandsAndTypes(OpBuilder &builder, Location loc,
 /// std::nullopt if the op shouldn't be or cannot be unrolled.
 static std::optional<SmallVector<int64_t>>
 getTargetShape(const vector::UnrollVectorOptions &options, Operation *op) {
-  LDBG("");
-  LDBG("Get unroll shape for op " << op->getName().getStringRef());
+  LDBG() << "Get unroll shape for op " << op->getName().getStringRef();
   if (options.filterConstraint && failed(options.filterConstraint(op))) {
-    LDBG("--no filter constraint -> BAIL");
+    LDBG() << "--no filter constraint -> BAIL";
     return std::nullopt;
   }
   assert(options.nativeShape &&
@@ -101,33 +98,33 @@ getTargetShape(const vector::UnrollVectorOptions &options, Operation *op) {
          "shape call back function to be set");
   auto unrollableVectorOp = dyn_cast<VectorUnrollOpInterface>(op);
   if (!unrollableVectorOp) {
-    LDBG("--not an unrollable op -> BAIL");
+    LDBG() << "--not an unrollable op -> BAIL";
     return std::nullopt;
   }
   auto maybeUnrollShape = unrollableVectorOp.getShapeForUnroll();
   if (!maybeUnrollShape) {
-    LDBG("--could not get shape of op " << *op << " -> BAIL");
+    LDBG() << "--could not get shape of op " << *op << " -> BAIL";
     return std::nullopt;
   }
-  LDBG("--vector op shape: " << llvm::interleaved(*maybeUnrollShape));
+  LDBG() << "--vector op shape: " << llvm::interleaved(*maybeUnrollShape);
 
   std::optional<SmallVector<int64_t>> targetShape = options.nativeShape(op);
   if (!targetShape) {
-    LDBG("--no unrolling target shape defined " << *op << "-> SKIP");
+    LDBG() << "--no unrolling target shape defined " << *op << "-> SKIP";
     return std::nullopt;
   }
-  LDBG("--target shape: " << llvm::interleaved(*targetShape));
+  LDBG() << "--target shape: " << llvm::interleaved(*targetShape);
 
   auto maybeShapeRatio = computeShapeRatio(*maybeUnrollShape, *targetShape);
   if (!maybeShapeRatio) {
-    LDBG("--could not compute integral shape ratio -> BAIL");
+    LDBG() << "--could not compute integral shape ratio -> BAIL";
     return std::nullopt;
   }
   if (llvm::all_of(*maybeShapeRatio, [](int64_t v) { return v == 1; })) {
-    LDBG("--no unrolling needed -> SKIP");
+    LDBG() << "--no unrolling needed -> SKIP";
     return std::nullopt;
   }
-  LDBG("--found an integral shape ratio to unroll to -> SUCCESS");
+  LDBG() << "--found an integral shape ratio to unroll to -> SUCCESS";
   return targetShape;
 }
 
diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
index c045063e8194f..10ed2bcfb35a3 100644
--- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
+++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
@@ -27,13 +27,11 @@
 #include "mlir/Support/LLVM.h"
 
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/DebugLog.h"
 #include "llvm/Support/InterleavedRange.h"
 
 #define DEBUG_TYPE "vector-utils"
 
-#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
-
 using namespace mlir;
 
 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
@@ -369,14 +367,14 @@ Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc,
 LogicalResult
 vector::isValidMaskedInputVector(ArrayRef<int64_t> shape,
                                  ArrayRef<int64_t> inputVectorSizes) {
-  LDBG("Iteration space static sizes:" << llvm::interleaved(shape));
+  LDBG() << "Iteration space static sizes:" << llvm::interleaved(shape);
 
   if (inputVectorSizes.size() != shape.size()) {
-    LDBG("Input vector sizes don't match the number of loops");
+    LDBG() << "Input vector sizes don't match the number of loops";
     return failure();
   }
   if (ShapedType::isDynamicShape(inputVectorSizes)) {
-    LDBG("Input vector sizes can't have dynamic dimensions");
+    LDBG() << "Input vector sizes can't have dynamic dimensions";
     return failure();
   }
   if (!llvm::all_of(llvm::zip(shape, inputVectorSizes),
@@ -386,8 +384,9 @@ vector::isValidMaskedInputVector(ArrayRef<int64_t> shape,
                       return ShapedType::isDynamic(staticSize) ||
                              staticSize <= inputSize;
                     })) {
-    LDBG("Input vector sizes must be greater than or equal to iteration space "
-         "static sizes");
+    LDBG() << "Input vector sizes must be greater than or equal to iteration "
+              "space "
+              "static sizes";
     return failure();
   }
   return success();
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index 4656f112958b8..d82c541f31359 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/DebugLog.h"
 
 namespace mlir {
 namespace xegpu {
@@ -26,8 +27,6 @@ namespace xegpu {
 } // namespace mlir
 
 #define DEBUG_TYPE "xegpu-blocking"
-#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 using namespace mlir;
 
@@ -53,7 +52,7 @@ resolveUnrealizedConversionCastOp(UnrealizedConversionCastOp castOp) {
   // We only interest in the case where all inputs and outputs have the
   // identical VectorTypes
   if (!hasIdenticalVectorTypes(inputs) || !hasIdenticalVectorTypes(outputs)) {
-    LDBG("skip unrealized conversion cast op not emulating pack/unpack.");
+    LDBG() << "skip unrealized conversion cast op not emulating pack/unpack.";
     return;
   }
 
@@ -149,7 +148,7 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const {
     if (auto type = dyn_cast<ShapedType>(value.getType()))
       return llvm::to_vector(type.getShape());
   }
-  LDBG("failed to getTileShape for: " << value);
+  LDBG() << "failed to getTileShape for: " << value;
   return std::nullopt;
 }
 
@@ -214,7 +213,7 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
         return layout && layout.isWgLayout();
       });
   if (hasWgLayoutOperands || hasWgLayoutResults) {
-    LDBG("skip unrolling for op with workgroup level layout: " << *op);
+    LDBG() << "skip unrolling for op with workgroup level layout: " << *op;
     return false;
   }
 
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
index a6208b455aa35..ec8fad484ed3e 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
@@ -17,7 +17,7 @@
 #include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
 #include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 
 namespace mlir {
 namespace xegpu {
@@ -27,8 +27,6 @@ namespace xegpu {
 } // namespace mlir
 
 #define DEBUG_TYPE "xegpu-unroll"
-#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 using namespace mlir;
 
@@ -44,11 +42,10 @@ struct UnrollPattern : public OpRewritePattern<SourceOp> {
   /// Return the target shape for the given `op`. Return std::nullopt if the
   /// op shouldn't be or cannot be unrolled.
   std::optional<SmallVector<int64_t>> getTargetShape(Operation *op) const {
-    LDBG("");
-    LDBG("Get unroll shape for: " << *op);
+    LDBG() << "Get unroll shape for: " << *op;
 
     if (options.filterConstraint && failed(options.filterConstraint(op))) {
-      LDBG("--no filter constraint -> BAIL");
+      LDBG() << "--no filter constraint -> BAIL";
       return std::nullopt;
     }
 
diff --git a/mlir/lib/Transforms/RemoveDeadValues.cpp b/mlir/lib/Transforms/RemoveDeadValues.cpp
index ddd5f2ba1a7b7..cfd7b5b6c22ff 100644
--- a/mlir/lib/Transforms/RemoveDeadValues.cpp
+++ b/mlir/lib/Transforms/RemoveDeadValues.cpp
@@ -51,6 +51,7 @@
 #include "mlir/Transforms/Passes.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include <cassert>
 #include <cstddef>
 #include <memory>
@@ -59,7 +60,6 @@
 
 #define DEBUG_TYPE "remove-dead-values"
 #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 namespace mlir {
 #define GEN_PASS_DEF_REMOVEDEADVALUES
@@ -119,21 +119,21 @@ static bool hasLive(ValueRange values, const DenseSet<Value> &nonLiveSet,
                     RunLivenessAnalysis &la) {
   for (Value value : values) {
     if (nonLiveSet.contains(value)) {
-      LDBG("Value " << value << " is already marked non-live (dead)");
+      LDBG() << "Value " << value << " is already marked non-live (dead)";
       continue;
     }
 
     const Liveness *liveness = la.getLiveness(value);
     if (!liveness) {
-      LDBG("Value " << value
-                    << " has no liveness info, conservatively considered live");
+      LDBG() << "Value " << value
+             << " has no liveness info, conservatively considered live";
       return true;
     }
     if (liveness->isLive) {
-      LDBG("Value " << value << " is live according to liveness analysis");
+      LDBG() << "Value " << value << " is live according to liveness analysis";
       return true;
     } else {
-      LDBG("Value " << value << " is dead according to liveness analysis");
+      LDBG() << "Value " << value << " is dead according to liveness analysis";
     }
   }
   return false;
@@ -148,8 +148,8 @@ static BitVector markLives(ValueRange values, const DenseSet<Value> &nonLiveSet,
   for (auto [index, value] : llvm::enumerate(values)) {
     if (nonLiveSet.contains(value)) {
       lives.reset(index);
-      LDBG("Value " << value << " is already marked non-live (dead) at index "
-                    << index);
+      LDBG() << "Value " << value
+             << " is already marked non-live (dead) at index " << index;
       continue;
     }
 
@@ -161,17 +161,17 @@ static BitVector markLives(ValueRange values, const DenseSet<Value> &nonLiveSet,
     // (because they weren't erased) and also their liveness is null because
     // liveness analysis ran before their creation.
     if (!liveness) {
-      LDBG("Value " << value << " at index " << index
-                    << " has no liveness info, conservatively considered live");
+      LDBG() << "Value " << value << " at index " << index
+             << " has no liveness info, conservatively considered live";
       continue;
     }
     if (!liveness->isLive) {
       lives.reset(index);
-      LDBG("Value " << value << " at index " << index
-                    << " is dead according to liveness analysis");
+      LDBG() << "Value " << value << " at index " << index
+             << " is dead according to liveness analysis";
     } else {
-      LDBG("Value " << value << " at index " << index
-                    << " is live according to liveness analysis");
+      LDBG() << "Value " << value << " at index " << index
+             << " is live according to liveness analysis";
     }
   }
 
@@ -187,8 +187,8 @@ static void collectNonLiveValues(DenseSet<Value> &nonLiveSet, ValueRange range,
     if (!nonLive[index])
       continue;
     nonLiveSet.insert(result);
-    LDBG("Marking value " << result << " as non-live (dead) at index "
-                          << index);
+    LDBG() << "Marking value " << result << " as non-live (dead) at index "
+           << index;
   }
 }
 
@@ -258,16 +258,18 @@ static SmallVector<OpOperand *> operandsToOpOperands(OperandRange operands) {
 static void processSimpleOp(Operation *op, RunLivenessAnalysis &la,
                             DenseSet<Value> &nonLiveSet,
                             RDVFinalCleanupList &cl) {
-  LDBG("Processing simple op: " << *op);
+  LDBG() << "Processing simple op: " << *op;
   if (!isMemoryEffectFree(op) || hasLive(op->getResults(), nonLiveSet, la)) {
-    LDBG("Simple op is not memory effect free or has live results, skipping: "
-         << *op);
+    LDBG()
+        << "Simple op is not memory effect free or has live results, skipping: "
+        << *op;
     return;
   }
 
-  LDBG("Simple op has all dead results and is memory effect free, scheduling "
-       "for removal: "
-       << *op);
+  LDBG()
+      << "Simple op has all dead results and is memory effect free, scheduling "
+         "for removal: "
+      << *op;
   cl.operations.push_back(op);
   collectNonLiveValues(nonLiveSet, op->getResults(),
                        BitVector(op->getNumResults(), true));
@@ -286,10 +288,10 @@ static void processSimpleOp(Operation *op, RunLivenessAnalysis &la,
 static void processFuncOp(FunctionOpInterface funcOp, Operation *module,
                           RunLivenessAnalysis &la, DenseSet<Value> &nonLiveSet,
                           RDVFinalCleanupList &cl) {
-  LDBG("Processing function op: " << funcOp.getOperation()->getName());
+  LDBG() << "Processing function op: " << funcOp.getOperation()->getName();
   if (funcOp.isPublic() || funcOp.isExternal()) {
-    LDBG("Function is public or external, skipping: "
-         << funcOp.getOperation()->getName());
+    LDBG() << "Function is public or external, skipping: "
+           << funcOp.getOperation()->getName();
     return;
   }
 
@@ -697,7 +699,7 @@ static void processRegionBranchOp(RegionBranchOpInterface regionBranchOp,
 static void processBranchOp(BranchOpInterface branchOp, RunLivenessAnalysis &la,
                             DenseSet<Value> &nonLiveSet,
                             RDVFinalCleanupList &cl) {
-  LDBG("Processing branch op: " << *branchOp);
+  LDBG() << "Processing branch op: " << *branchOp;
   unsigned numSuccessors = branchOp->getNumSuccessors();
 
   for (unsigned succIdx = 0; succIdx < numSuccessors; ++succIdx) {
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index f71fcf7ca297b..c6245b637c2a7 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -20,8 +20,6 @@ using namespace mlir::xegpu;
 namespace {
 
 #define DEBUG_TYPE "test-xegpu-unroll"
-#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 struct TestXeGPUUnrollingPatterns
     : public PassWrapper<TestXeGPUUnrollingPatterns,

>From dbc86651428cb2d496a397f92dfddc2acfb51eb6 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jacques+gh at japienaar.info>
Date: Fri, 25 Jul 2025 15:48:33 +0000
Subject: [PATCH 2/2] Fix some missed

---
 mlir/lib/Dialect/GPU/TransformOps/Utils.cpp   |  5 +--
 .../Linalg/TransformOps/GPUHeuristics.cpp     |  2 +-
 .../Linalg/Transforms/Vectorization.cpp       | 33 +++++++++----------
 mlir/lib/Dialect/SCF/Utils/Utils.cpp          |  4 +--
 4 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
index d73bedd759d0f..05bd917b3e40a 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
@@ -27,7 +27,8 @@
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
+#include "llvm/Support/InterleavedRange.h"
 
 using namespace mlir;
 using namespace mlir::gpu;
@@ -145,7 +146,7 @@ commonLinearIdBuilderFn(int64_t multiplicity = 1,
           mask.createLogicalLinearMappingId(rewriter, scaledLinearIdI64);
       scaledLinearId = arith::IndexCastUIOp::create(
           rewriter, loc, rewriter.getIndexType(), logicalLinearIdI64);
-      LDBG("------adjusting linearId with mask: " << scaledLinearId);
+      LDBG() << "------adjusting linearId with mask: " << scaledLinearId;
     }
 
     // 3. Compute remapped indices.
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp b/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp
index dd5476688ca37..c926dfbc4a315 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/GPUHeuristics.cpp
@@ -11,7 +11,7 @@
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include "llvm/Support/InterleavedRange.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index cb6ca7022098a..467a42e808da4 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -486,8 +486,9 @@ Value VectorizationState::getOrCreateMaskFor(
                                   ? true
                                   : std::get<0>(it) == std::get<1>(it);
                      })) {
-      LDBG("Dynamic + static dimensions match vector sizes, masking is not "
-           "required.\n");
+      LDBG()
+          << "Dynamic + static dimensions match vector sizes, masking is not "
+             "required.";
       activeMaskCache[maskingMap] = Value();
       return Value();
     }
@@ -502,7 +503,7 @@ Value VectorizationState::getOrCreateMaskFor(
   // Create the mask based on the dimension values.
   Value mask = vector::CreateMaskOp::create(rewriter, linalgOp.getLoc(),
                                             maskType, upperBounds);
-  LDBG("Creating new mask: " << mask << "\n");
+  LDBG() << "Creating new mask: " << mask;
   activeMaskCache[maskingMap] = mask;
   return mask;
 }
@@ -2145,7 +2146,7 @@ vectorizeAsLinalgContraction(RewriterBase &rewriter, VectorizationState &state,
   Operation *reduceOp = matchLinalgReduction(outOperand);
   auto maybeKind = getCombinerOpKind(reduceOp);
   if (!maybeKind) {
-    LDBG("Failed to determine contraction combining kind.\n");
+    LDBG() << "Failed to determine contraction combining kind.";
     return failure();
   }
 
@@ -2155,7 +2156,7 @@ vectorizeAsLinalgContraction(RewriterBase &rewriter, VectorizationState &state,
   AffineMap lhsMap = linalgOp.getIndexingMapsArray()[0];
   AffineMap rhsMap = linalgOp.getIndexingMapsArray()[1];
   if (getUnusedDimsBitVector({lhsMap, rhsMap}).any()) {
-    LDBG("Contractions with broadcasts are not supported.\n");
+    LDBG() << "Contractions with broadcasts are not supported.";
     return failure();
   }
 
@@ -2650,18 +2651,15 @@ FailureOr<VectorizationResult> mlir::linalg::vectorize(
     ArrayRef<bool> inputScalableVecDims, bool vectorizeNDExtract,
     bool flatten1DDepthwiseConv, bool assumeDynamicDimsMatchVecSizes,
     bool createNamedContraction) {
-  LDBG("Attempting to vectorize:\n" << *op << "\n");
-  LDBG("Input vector sizes: ");
-  LLVM_DEBUG(llvm::interleaveComma(inputVectorSizes, llvm::dbgs()));
-  LLVM_DEBUG(llvm::dbgs() << "\n");
-  LDBG("Input scalable vector dims: ");
-  LLVM_DEBUG(llvm::interleaveComma(inputScalableVecDims, llvm::dbgs()));
-  LLVM_DEBUG(llvm::dbgs() << "\n");
+  LDBG() << "Attempting to vectorize: " << *op;
+  LDBG() << "Input vector sizes: " << llvm::interleaved(inputVectorSizes);
+  LDBG() << "Input scalable vector dims: "
+         << llvm::interleaved(inputScalableVecDims);
 
   if (failed(vectorizeOpPrecondition(op, inputVectorSizes, inputScalableVecDims,
                                      vectorizeNDExtract,
                                      flatten1DDepthwiseConv))) {
-    LDBG("Vectorization pre-conditions failed\n");
+    LDBG() << "Vectorization pre-conditions failed";
     return failure();
   }
 
@@ -2671,7 +2669,7 @@ FailureOr<VectorizationResult> mlir::linalg::vectorize(
     if (failed(state.initState(rewriter, linalgOp, inputVectorSizes,
                                inputScalableVecDims,
                                assumeDynamicDimsMatchVecSizes))) {
-      LDBG("Vectorization state couldn't be initialized\n");
+      LDBG() << "Vectorization state couldn't be initialized";
       return failure();
     }
   }
@@ -2692,7 +2690,7 @@ FailureOr<VectorizationResult> mlir::linalg::vectorize(
                 return success();
               }
 
-              LDBG("Unsupported convolution can't be vectorized.\n");
+              LDBG() << "Unsupported convolution can't be vectorized.";
               return failure();
             }
 
@@ -2701,8 +2699,9 @@ FailureOr<VectorizationResult> mlir::linalg::vectorize(
               return vectorizeAsLinalgContraction(rewriter, state, linalgOp,
                                                   results);
 
-            LDBG("Vectorize generic by broadcasting to the canonical vector "
-                 "shape\n");
+            LDBG()
+                << "Vectorize generic by broadcasting to the canonical vector "
+                   "shape";
 
             // Pre-process before proceeding.
             convertAffineApply(rewriter, linalgOp);
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index e2e2f516bc9f0..5b0c60415a6c4 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -24,14 +24,12 @@
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
 #include <cstdint>
 
 using namespace mlir;
 
 #define DEBUG_TYPE "scf-utils"
-#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
-#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 SmallVector<scf::ForOp> mlir::replaceLoopNestWithNewYields(
     RewriterBase &rewriter, MutableArrayRef<scf::ForOp> loopNest,