[llvm] [mlir] [Phase 1] full flow (PR #143366)

Mon Jun 9 04:06:05 PDT 2025

https://github.com/amroshahbari27 created https://github.com/llvm/llvm-project/pull/143366

None

>From 72f80b57b2380b313cc46456b910ff945e24847e Mon Sep 17 00:00:00 2001
From: amroshahbari27 <amro.shahbari.1998 at gmail.com>
Date: Mon, 9 Jun 2025 10:59:20 +0000
Subject: [PATCH] [Phase 1] full flow

---
 .gitignore                             |   1 +
 TTL_MLIR_Integration.md                |  68 ++++++++
 mlir/include/mlir/Transforms/Passes.h  |  12 ++
 mlir/include/mlir/Transforms/Passes.td |  26 +++
 mlir/lib/Transforms/CMakeLists.txt     |   3 +
 mlir/lib/Transforms/TTLOps.cpp         | 210 +++++++++++++++++++++++++
 mlir/lib/Transforms/TTLPipeline.cpp    |  61 +++++++
 mlir/lib/Transforms/TTLToEmitC.cpp     |  63 ++++++++
 8 files changed, 444 insertions(+)
 create mode 100644 TTL_MLIR_Integration.md
 create mode 100644 mlir/lib/Transforms/TTLOps.cpp
 create mode 100644 mlir/lib/Transforms/TTLPipeline.cpp
 create mode 100644 mlir/lib/Transforms/TTLToEmitC.cpp

diff --git a/.gitignore b/.gitignore
index a84268a7f6863..4ccd54db32a7f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,3 +73,4 @@ pythonenv*
 /clang/utils/analyzer/projects/*/RefScanBuildResults
 # automodapi puts generated documentation files here.
 /lldb/docs/python_api/
+/install
\ No newline at end of file
diff --git a/TTL_MLIR_Integration.md b/TTL_MLIR_Integration.md
new file mode 100644
index 0000000000000..b04f736e34b94
--- /dev/null
+++ b/TTL_MLIR_Integration.md
@@ -0,0 +1,68 @@
+# TTL MLIR Integration
+
+## Project Overview
+This project aims to integrate TTL (Template Tiling Library) with MLIR to create an optimized pipeline from C code to TTL-optimized C code. The pipeline includes affine loop tiling and dialect conversions, with a focus on optimizing operations like sigmoid.
+
+## Current Pipeline
+```
+C code with TTL DSL → MLIR → Optimized MLIR → EmitC → C code
+```
+
+## Technical Implementation
+
+### Version Compatibility
+- Using LLVM 20 for MLIR pipeline
+- Polygeist (C → MLIR) is on LLVM 18
+- Solution: Manually removing incompatible parts
+- This is a manageable limitation for now
+
+### Type System Integration
+- Minor issue with unrealized conversion casts
+- Can be fixed with a simple pass if needed
+- Not a critical blocker
+
+### TTL Integration Strategy
+Two possible approaches:
+1. Generate direct function calls to TTL's existing functions
+2. Create a TTL dialect (if needed)
+- Currently leaning towards function calls for simplicity
+- Decision pending based on future requirements
+
+## Next Steps
+
+### 1. Frontend Definition
+- Define Polygeist as the frontend
+- Its output will feed into TTL optimizer passes (like tiling)
+- Currently supporting minimal 2D loops and array access
+- Will expand TTL DSL features in the frontend
+
+### 2. Backend Generation
+- Develop pipeline to generate TTL-specific code
+- Focus on efficient memory operations and tiling
+
+### 3. TTL DSL Development
+- Currently minimal: 2D loops and array access
+- Will expand based on requirements
+- Starting with sigmoid as a test case
+
+### 4. Immediate Focus
+- Optimizing sigmoid function
+- Using it as a test case for the complete pipeline
+- Will use learnings to expand to other operations
+
+## Technical Decisions
+- Keeping things simple with function calls rather than new dialect
+- Managing version compatibility manually for now
+- Type conversion issues are minor and can be addressed if needed
+
+## Current Limitations
+1. Version mismatch between Polygeist and MLIR pipeline
+2. Minimal TTL DSL features in frontend
+3. Focus on sigmoid optimization only
+
+## Future Work
+1. Expand TTL DSL features
+2. Add more optimization passes
+3. Support more complex operations
+4. Evaluate need for TTL dialect
+5. Consider automating version compatibility fixes 
\ No newline at end of file
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 41f208216374f..b0cf7baf2c619 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -46,6 +46,9 @@ class GreedyRewriteConfig;
 #define GEN_PASS_DECL_SYMBOLPRIVATIZE
 #define GEN_PASS_DECL_TOPOLOGICALSORT
 #define GEN_PASS_DECL_COMPOSITEFIXEDPOINTPASS
+#define GEN_PASS_DECL_TTLOPS
+#define GEN_PASS_DECL_TTLPIPELINE
+#define GEN_PASS_DECL_TTLTOEMITC
 #include "mlir/Transforms/Passes.h.inc"
 
 /// Creates an instance of the Canonicalizer pass, configured with default
@@ -65,6 +68,15 @@ createCanonicalizerPass(const GreedyRewriteConfig &config,
                         ArrayRef<std::string> disabledPatterns = std::nullopt,
                         ArrayRef<std::string> enabledPatterns = std::nullopt);
 
+/// Creates a TTL ops pass.
+std::unique_ptr<Pass> createTTLOpsPass();
+
+/// Creates a TTL pipeline pass that runs multiple passes.
+std::unique_ptr<Pass> createTTLPipelinePass();
+
+/// Creates a TTL to emit C pass.
+std::unique_ptr<Pass> createTTLToEmitC();
+
 /// Creates a pass to perform control-flow sinking.
 std::unique_ptr<Pass> createControlFlowSinkPass();
 
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 1e89a78912e99..4b74f5f8e3ac8 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -54,6 +54,24 @@ def Canonicalizer : Pass<"canonicalize"> {
   ] # RewritePassUtils.options;
 }
 
+def TTLOps : Pass<"ttl-ops", "ModuleOp"> {
+  let summary = "Convert TTL operations to MLIR";
+  let description = [{
+    This pass converts TTL operations to their MLIR equivalents.
+  }];
+  let constructor = "mlir::createTTLOpsPass()";
+  let dependentDialects = ["func::FuncDialect"];
+}
+
+def TTLPipeline : Pass<"ttl-pipeline", "ModuleOp"> {
+  let summary = "Run a pipeline of TTL passes";
+  let description = [{
+    This pass runs a sequence of TTL-related passes in a specific order.
+  }];
+  let constructor = "mlir::createTTLPipelinePass()";
+  let dependentDialects = ["func::FuncDialect"];
+}
+
 def ControlFlowSink : Pass<"control-flow-sink"> {
   let summary = "Sink operations into conditional blocks";
   let description = [{
@@ -586,4 +604,12 @@ def CompositeFixedPointPass : Pass<"composite-fixed-point-pass"> {
   ];
 }
 
+def TTLToEmitC : Pass<"ttl-to-emitc", "func::FuncOp"> {
+  let summary = "Convert TTL operations to EmitC dialect";
+  let description = [{
+    This pass converts TTL operations to EmitC dialect for C code generation.
+  }];
+  let dependentDialects = ["mlir::emitc::EmitCDialect"];
+}
+
 #endif // MLIR_TRANSFORMS_PASSES
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 3a8088bccf299..863ef531efbd6 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -1,6 +1,9 @@
 add_subdirectory(Utils)
 
 add_mlir_library(MLIRTransforms
+  TTLOps.cpp
+  TTLPipeline.cpp
+  TTLToEmitC.cpp
   Canonicalizer.cpp
   CompositePass.cpp
   ControlFlowSink.cpp
diff --git a/mlir/lib/Transforms/TTLOps.cpp b/mlir/lib/Transforms/TTLOps.cpp
new file mode 100644
index 0000000000000..fb2f5a9b3ed3a
--- /dev/null
+++ b/mlir/lib/Transforms/TTLOps.cpp
@@ -0,0 +1,210 @@
+#include "mlir/Pass/Pass.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
+#include "mlir/Dialect/Affine/Analysis/Utils.h"
+#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
+#include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::affine;
+
+namespace {
+
+// Core data structures for analyzing loops and memory accesses
+struct LoopInfo {
+  // Loop bounds and step
+  int64_t lowerBound;
+  int64_t upperBound;
+  int64_t step;
+  
+  // Memory accesses in this loop
+  enum class AccessType {
+    Load,
+    Store
+  };
+
+  struct MemoryAccess {
+    Value memref;           // The memref being accessed
+    AffineMap accessMap;    // The affine map for the access
+    AccessType type;        // Whether it's a load or store
+  };
+  SmallVector<MemoryAccess> accesses;
+};
+
+// Helper class to validate loop structures and memory accesses
+class LoopValidator {
+public:
+  // Check if a memory access is 2D
+  static bool is2DAccess(Operation *op) {
+    AffineMap map;
+    if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
+      map = loadOp.getAffineMap();
+    } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
+      map = storeOp.getAffineMap();
+    } else {
+      assert(false && "Expected load or store operation");
+    }
+    return map.getNumResults() == 2;
+  }
+
+
+  // Validate loop band and collect information if valid
+  static std::optional<SmallVector<LoopInfo>> validateAndCollectInfo(ArrayRef<AffineForOp> loops) {
+    // Check if it's a 2D perfectly nested loop
+    if (loops.size() != 2 || !affine::isPerfectlyNested(loops)) {
+      return std::nullopt;
+    }
+
+    SmallVector<LoopInfo> loopInfos;
+    
+    // Analyze each loop
+    for (const auto &loop : loops) {
+      LoopInfo info;
+      
+      // Get loop bounds and check if they're compile-time constants
+      auto lowerMap = const_cast<AffineForOp &>(loop).getLowerBoundMap();
+      auto upperMap = const_cast<AffineForOp &>(loop).getUpperBoundMap();
+      
+      if (!lowerMap.isConstant() || !upperMap.isConstant()) {
+        return std::nullopt;
+      }
+      
+      info.lowerBound = lowerMap.getSingleConstantResult();
+      info.upperBound = upperMap.getSingleConstantResult();
+      info.step = const_cast<AffineForOp &>(loop).getStep().getSExtValue();
+      
+      // Only collect memory accesses in the innermost loop
+      if (loop == loops.back()) {
+        bool all2D = true;
+        loop->walk([&](Operation *op) {
+          if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
+            if (!is2DAccess(op)) {
+              all2D = false;
+              return;
+            }
+            info.accesses.push_back({loadOp.getMemRef(), loadOp.getAffineMap(), LoopInfo::AccessType::Load});
+          } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
+            if (!is2DAccess(op)) {
+              all2D = false;
+              return;
+            }
+            info.accesses.push_back({storeOp.getMemRef(), storeOp.getAffineMap(), LoopInfo::AccessType::Store});
+          }
+        });
+
+        // If not all accesses are 2D, return nullopt
+        if (!all2D) {
+          return std::nullopt;
+        }
+      }
+      
+      loopInfos.push_back(info);
+    }
+
+    return loopInfos;
+  }
+};
+
+// Helper function to print loop information
+static void printLoopInfo(const SmallVector<LoopInfo> &loopInfos, func::FuncOp funcOp) {
+  llvm::errs() << "\n=== Band Information ===\n";
+  
+  // Print loop structure
+  llvm::errs() << "Loop Structure:\n";
+  for (size_t i = 0; i < loopInfos.size(); i++) {
+    const auto &info = loopInfos[i];
+    llvm::errs() << "  Loop " << i << ": [" << info.lowerBound << ", " 
+                 << info.upperBound << ") step " << info.step << "\n";
+  }
+  
+  // Print only innermost loop's memory accesses
+  llvm::errs() << "\nMemory Accesses in Innermost Loop:\n";
+  const auto &innerLoop = loopInfos.back();
+  for (const auto &access : innerLoop.accesses) {
+    llvm::errs() << "  " << (access.type == LoopInfo::AccessType::Load ? "Load" : "Store") << " from ";
+    
+    // Print block argument information
+    if (auto blockArg = dyn_cast<BlockArgument>(access.memref)) {
+      llvm::errs() << "<block argument> of type '" << blockArg.getType() 
+                   << "' at index: " << blockArg.getArgNumber()
+                   << " (arg" << blockArg.getArgNumber() << ")";
+    } else {
+      llvm::errs() << access.memref;
+    }
+    llvm::errs() << "\n";
+    llvm::errs() << "    Access Map: " << access.accessMap << "\n";
+  }
+  llvm::errs() << "================================\n";
+}
+
+struct TTLOps : public PassWrapper<TTLOps, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TTLOps)
+
+  // Default constructor
+  TTLOps() = default;
+
+  // Copy constructor - needed for pass cloning
+  TTLOps(const TTLOps &other) : PassWrapper<TTLOps, OperationPass<ModuleOp>>(other) {
+    // Copy option values
+    localMemorySize = other.localMemorySize;
+    loadCost = other.loadCost;
+    storeCost = other.storeCost;
+  }
+
+  // Pass options
+  Option<unsigned> localMemorySize{
+      *this, "local-memory-size",
+      llvm::cl::desc("Size of local memory in KB (default: 32)"),
+      llvm::cl::init(32)};
+  Option<unsigned> loadCost{
+      *this, "load-cost",
+      llvm::cl::desc("Cost of a load operation (default: 1)"),
+      llvm::cl::init(1)};
+  Option<unsigned> storeCost{
+      *this, "store-cost",
+      llvm::cl::desc("Cost of a store operation (default: 1)"),
+      llvm::cl::init(1)};
+
+  StringRef getArgument() const override { return "ttl-ops"; }
+  StringRef getDescription() const override { return "TTL operations pass"; }
+
+  void runOnOperation() override {
+    ModuleOp module = getOperation();
+    
+    // Ensure we only have one function in the module
+    auto funcOps = module.getOps<func::FuncOp>();
+    assert(std::distance(funcOps.begin(), funcOps.end()) == 1 && 
+           "Expected exactly one function in the module");
+    
+    // Find perfect loop nests (bands) in each function
+    module->walk([&](func::FuncOp funcOp) {
+      std::vector<SmallVector<AffineForOp, 6>> bands;
+      mlir::affine::getTileableBands(funcOp, &bands);
+
+      // Analyze each band
+      for (const auto &band : bands) {
+        // Validate band and collect information
+        if (auto loopInfos = LoopValidator::validateAndCollectInfo(band)) {
+          printLoopInfo(*loopInfos, funcOp);
+        }
+      }
+    });
+  }
+};
+
+// Register the pass
+void registerTTLOps() {
+  PassRegistration<TTLOps>();
+}
+} // end anonymous namespace
+
+namespace mlir {
+std::unique_ptr<Pass> createTTLOpsPass() {
+  return std::make_unique<TTLOps>();
+}
+} // end namespace mlir
\ No newline at end of file
diff --git a/mlir/lib/Transforms/TTLPipeline.cpp b/mlir/lib/Transforms/TTLPipeline.cpp
new file mode 100644
index 0000000000000..192e3c7c680e8
--- /dev/null
+++ b/mlir/lib/Transforms/TTLPipeline.cpp
@@ -0,0 +1,61 @@
+#include "mlir/Pass/Pass.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/Passes.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Analysis/Utils.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Affine/Passes.h"
+
+using namespace mlir;
+
+namespace {
+
+struct TTLPipeline : public PassWrapper<TTLPipeline, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TTLPipeline)
+
+  // Default constructor
+  TTLPipeline() = default;
+
+  // Copy constructor - needed for pass cloning
+  TTLPipeline(const TTLPipeline &other) : PassWrapper<TTLPipeline, OperationPass<ModuleOp>>(other) {}
+
+  // Pass options
+  Option<unsigned> cacheSize{
+      *this, "cache-size",
+      llvm::cl::desc("Cache size in bytes for loop tiling (default: 32768)"),
+      llvm::cl::init(32768)};
+
+  StringRef getArgument() const override { return "ttl-pipeline"; }
+  StringRef getDescription() const override { return "TTL pipeline"; }
+
+  void runOnOperation() override {
+    ModuleOp module = getOperation();
+    PassManager pm(module->getContext());
+    
+    // Add function-level passes using addNestedPass
+    pm.addNestedPass<func::FuncOp>(affine::createLoopTilingPass(cacheSize));
+    
+    // Add module-level passes
+    pm.addPass(createTTLToEmitC());
+    
+    // Run the pipeline
+    if (failed(pm.run(module))) {
+      signalPassFailure();
+    }
+  }
+};
+
+// Register the pass
+void registerTTLPipeline() {
+  PassRegistration<TTLPipeline>();
+}
+
+} // end anonymous namespace
+
+namespace mlir {
+std::unique_ptr<Pass> createTTLPipelinePass() {
+  return std::make_unique<TTLPipeline>();
+}
+} // end namespace mlir 
\ No newline at end of file
diff --git a/mlir/lib/Transforms/TTLToEmitC.cpp b/mlir/lib/Transforms/TTLToEmitC.cpp
new file mode 100644
index 0000000000000..550013cc9e180
--- /dev/null
+++ b/mlir/lib/Transforms/TTLToEmitC.cpp
@@ -0,0 +1,63 @@
+#include "mlir/Pass/Pass.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/SCFToEmitC/SCFToEmitC.h"
+#include "mlir/Conversion/ArithToEmitC/ArithToEmitC.h"
+#include "mlir/Conversion/MathToEmitC/MathToEmitC.h"
+#include "mlir/Conversion/MemRefToEmitC/MemRefToEmitC.h"
+#include "mlir/Conversion/ConvertToEmitC/ConvertToEmitCPass.h"
+#include "mlir/Conversion/Passes.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+
+namespace {
+
+struct TTLToEmitC : public PassWrapper<TTLToEmitC, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TTLToEmitC)
+
+  TTLToEmitC() = default;
+  TTLToEmitC(const TTLToEmitC &other) : PassWrapper<TTLToEmitC, OperationPass<ModuleOp>>(other) {}
+
+  StringRef getArgument() const override { return "ttl-to-emitc"; }
+  StringRef getDescription() const override { return "Convert TTL operations to EmitC dialect"; }
+
+  void runOnOperation() override {
+    ModuleOp module = getOperation();
+    PassManager pm(module->getContext());
+    
+    // First convert Affine to SCF, MemRef, etc...
+    pm.addNestedPass<func::FuncOp>(createLowerAffinePass());
+    
+    // Then convert all dialects to EmitC
+    pm.addNestedPass<func::FuncOp>(createConvertArithToEmitC());
+    pm.addNestedPass<func::FuncOp>(createConvertMathToEmitC());
+    pm.addNestedPass<func::FuncOp>(createConvertMemRefToEmitC());
+    pm.addNestedPass<func::FuncOp>(createSCFToEmitC());
+    pm.addNestedPass<func::FuncOp>(createConvertToEmitC());
+    
+    // Clean up passes
+    pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
+    
+    // Reconcile unrealized casts must run at module level
+    pm.addPass(createReconcileUnrealizedCastsPass());
+    
+    if (failed(pm.run(module))) {
+      signalPassFailure();
+    }
+  }
+};
+
+void registerTTLToEmitC() {
+  PassRegistration<TTLToEmitC>();
+}
+
+} // end anonymous namespace
+
+namespace mlir {
+std::unique_ptr<Pass> createTTLToEmitC() {
+  return std::make_unique<TTLToEmitC>();
+}
+} // end namespace mlir 
\ No newline at end of file