[llvm] [mlir] [Phase 1] full flow (PR #143366)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 9 04:07:10 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-core
Author: Amro Shahbari (amroshahbari27)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/143366.diff
8 Files Affected:
- (modified) .gitignore (+1)
- (added) TTL_MLIR_Integration.md (+68)
- (modified) mlir/include/mlir/Transforms/Passes.h (+12)
- (modified) mlir/include/mlir/Transforms/Passes.td (+26)
- (modified) mlir/lib/Transforms/CMakeLists.txt (+3)
- (added) mlir/lib/Transforms/TTLOps.cpp (+210)
- (added) mlir/lib/Transforms/TTLPipeline.cpp (+61)
- (added) mlir/lib/Transforms/TTLToEmitC.cpp (+63)
``````````diff
diff --git a/.gitignore b/.gitignore
index a84268a7f6863..4ccd54db32a7f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,3 +73,4 @@ pythonenv*
/clang/utils/analyzer/projects/*/RefScanBuildResults
# automodapi puts generated documentation files here.
/lldb/docs/python_api/
+/install
\ No newline at end of file
diff --git a/TTL_MLIR_Integration.md b/TTL_MLIR_Integration.md
new file mode 100644
index 0000000000000..b04f736e34b94
--- /dev/null
+++ b/TTL_MLIR_Integration.md
@@ -0,0 +1,68 @@
+# TTL MLIR Integration
+
+## Project Overview
+This project aims to integrate TTL (Template Tiling Library) with MLIR to create an optimized pipeline from C code to TTL-optimized C code. The pipeline includes affine loop tiling and dialect conversions, with a focus on optimizing operations like sigmoid.
+
+## Current Pipeline
+```
+C code with TTL DSL → MLIR → Optimized MLIR → EmitC → C code
+```
+
+## Technical Implementation
+
+### Version Compatibility
+- Using LLVM 20 for MLIR pipeline
+- Polygeist (C → MLIR) is on LLVM 18
+- Solution: Manually removing incompatible parts
+- This is a manageable limitation for now
+
+### Type System Integration
+- Minor issue with unrealized conversion casts
+- Can be fixed with a simple pass if needed
+- Not a critical blocker
+
+### TTL Integration Strategy
+Two possible approaches:
+1. Generate direct function calls to TTL's existing functions
+2. Create a TTL dialect (if needed)
+- Currently leaning towards function calls for simplicity
+- Decision pending based on future requirements
+
+## Next Steps
+
+### 1. Frontend Definition
+- Define Polygeist as the frontend
+- Its output will feed into TTL optimizer passes (like tiling)
+- Currently supporting minimal 2D loops and array access
+- Will expand TTL DSL features in the frontend
+
+### 2. Backend Generation
+- Develop pipeline to generate TTL-specific code
+- Focus on efficient memory operations and tiling
+
+### 3. TTL DSL Development
+- Currently minimal: 2D loops and array access
+- Will expand based on requirements
+- Starting with sigmoid as a test case
+
+### 4. Immediate Focus
+- Optimizing sigmoid function
+- Using it as a test case for the complete pipeline
+- Will use learnings to expand to other operations
+
+## Technical Decisions
+- Keeping things simple with function calls rather than new dialect
+- Managing version compatibility manually for now
+- Type conversion issues are minor and can be addressed if needed
+
+## Current Limitations
+1. Version mismatch between Polygeist and MLIR pipeline
+2. Minimal TTL DSL features in frontend
+3. Focus on sigmoid optimization only
+
+## Future Work
+1. Expand TTL DSL features
+2. Add more optimization passes
+3. Support more complex operations
+4. Evaluate need for TTL dialect
+5. Consider automating version compatibility fixes
\ No newline at end of file
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 41f208216374f..b0cf7baf2c619 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -46,6 +46,9 @@ class GreedyRewriteConfig;
#define GEN_PASS_DECL_SYMBOLPRIVATIZE
#define GEN_PASS_DECL_TOPOLOGICALSORT
#define GEN_PASS_DECL_COMPOSITEFIXEDPOINTPASS
+#define GEN_PASS_DECL_TTLOPS
+#define GEN_PASS_DECL_TTLPIPELINE
+#define GEN_PASS_DECL_TTLTOEMITC
#include "mlir/Transforms/Passes.h.inc"
/// Creates an instance of the Canonicalizer pass, configured with default
@@ -65,6 +68,15 @@ createCanonicalizerPass(const GreedyRewriteConfig &config,
ArrayRef<std::string> disabledPatterns = std::nullopt,
ArrayRef<std::string> enabledPatterns = std::nullopt);
+/// Creates a TTL ops pass.
+std::unique_ptr<Pass> createTTLOpsPass();
+
+/// Creates a TTL pipeline pass that runs multiple passes.
+std::unique_ptr<Pass> createTTLPipelinePass();
+
+/// Creates a TTL to emit C pass.
+std::unique_ptr<Pass> createTTLToEmitC();
+
/// Creates a pass to perform control-flow sinking.
std::unique_ptr<Pass> createControlFlowSinkPass();
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 1e89a78912e99..4b74f5f8e3ac8 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -54,6 +54,24 @@ def Canonicalizer : Pass<"canonicalize"> {
] # RewritePassUtils.options;
}
+def TTLOps : Pass<"ttl-ops", "ModuleOp"> {
+ let summary = "Convert TTL operations to MLIR";
+ let description = [{
+ This pass converts TTL operations to their MLIR equivalents.
+ }];
+ let constructor = "mlir::createTTLOpsPass()";
+ let dependentDialects = ["func::FuncDialect"];
+}
+
+def TTLPipeline : Pass<"ttl-pipeline", "ModuleOp"> {
+ let summary = "Run a pipeline of TTL passes";
+ let description = [{
+ This pass runs a sequence of TTL-related passes in a specific order.
+ }];
+ let constructor = "mlir::createTTLPipelinePass()";
+ let dependentDialects = ["func::FuncDialect"];
+}
+
def ControlFlowSink : Pass<"control-flow-sink"> {
let summary = "Sink operations into conditional blocks";
let description = [{
@@ -586,4 +604,12 @@ def CompositeFixedPointPass : Pass<"composite-fixed-point-pass"> {
];
}
+def TTLToEmitC : Pass<"ttl-to-emitc", "func::FuncOp"> {
+ let summary = "Convert TTL operations to EmitC dialect";
+ let description = [{
+ This pass converts TTL operations to EmitC dialect for C code generation.
+ }];
+ let dependentDialects = ["mlir::emitc::EmitCDialect"];
+}
+
#endif // MLIR_TRANSFORMS_PASSES
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 3a8088bccf299..863ef531efbd6 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -1,6 +1,9 @@
add_subdirectory(Utils)
add_mlir_library(MLIRTransforms
+ TTLOps.cpp
+ TTLPipeline.cpp
+ TTLToEmitC.cpp
Canonicalizer.cpp
CompositePass.cpp
ControlFlowSink.cpp
diff --git a/mlir/lib/Transforms/TTLOps.cpp b/mlir/lib/Transforms/TTLOps.cpp
new file mode 100644
index 0000000000000..fb2f5a9b3ed3a
--- /dev/null
+++ b/mlir/lib/Transforms/TTLOps.cpp
@@ -0,0 +1,210 @@
+#include "mlir/Pass/Pass.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
+#include "mlir/Dialect/Affine/Analysis/Utils.h"
+#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
+#include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::affine;
+
+namespace {
+
+// Core data structures for analyzing loops and memory accesses
+struct LoopInfo {
+ // Loop bounds and step
+ int64_t lowerBound;
+ int64_t upperBound;
+ int64_t step;
+
+ // Memory accesses in this loop
+ enum class AccessType {
+ Load,
+ Store
+ };
+
+ struct MemoryAccess {
+ Value memref; // The memref being accessed
+ AffineMap accessMap; // The affine map for the access
+ AccessType type; // Whether it's a load or store
+ };
+ SmallVector<MemoryAccess> accesses;
+};
+
+// Helper class to validate loop structures and memory accesses
+class LoopValidator {
+public:
+ // Check if a memory access is 2D
+ static bool is2DAccess(Operation *op) {
+ AffineMap map;
+ if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
+ map = loadOp.getAffineMap();
+ } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
+ map = storeOp.getAffineMap();
+ } else {
+ assert(false && "Expected load or store operation");
+ }
+ return map.getNumResults() == 2;
+ }
+
+
+ // Validate loop band and collect information if valid
+ static std::optional<SmallVector<LoopInfo>> validateAndCollectInfo(ArrayRef<AffineForOp> loops) {
+ // Check if it's a 2D perfectly nested loop
+ if (loops.size() != 2 || !affine::isPerfectlyNested(loops)) {
+ return std::nullopt;
+ }
+
+ SmallVector<LoopInfo> loopInfos;
+
+ // Analyze each loop
+ for (const auto &loop : loops) {
+ LoopInfo info;
+
+ // Get loop bounds and check if they're compile-time constants
+ auto lowerMap = const_cast<AffineForOp &>(loop).getLowerBoundMap();
+ auto upperMap = const_cast<AffineForOp &>(loop).getUpperBoundMap();
+
+ if (!lowerMap.isConstant() || !upperMap.isConstant()) {
+ return std::nullopt;
+ }
+
+ info.lowerBound = lowerMap.getSingleConstantResult();
+ info.upperBound = upperMap.getSingleConstantResult();
+ info.step = const_cast<AffineForOp &>(loop).getStep().getSExtValue();
+
+ // Only collect memory accesses in the innermost loop
+ if (loop == loops.back()) {
+ bool all2D = true;
+ loop->walk([&](Operation *op) {
+ if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
+ if (!is2DAccess(op)) {
+ all2D = false;
+ return;
+ }
+ info.accesses.push_back({loadOp.getMemRef(), loadOp.getAffineMap(), LoopInfo::AccessType::Load});
+ } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
+ if (!is2DAccess(op)) {
+ all2D = false;
+ return;
+ }
+ info.accesses.push_back({storeOp.getMemRef(), storeOp.getAffineMap(), LoopInfo::AccessType::Store});
+ }
+ });
+
+ // If not all accesses are 2D, return nullopt
+ if (!all2D) {
+ return std::nullopt;
+ }
+ }
+
+ loopInfos.push_back(info);
+ }
+
+ return loopInfos;
+ }
+};
+
+// Helper function to print loop information
+static void printLoopInfo(const SmallVector<LoopInfo> &loopInfos, func::FuncOp funcOp) {
+ llvm::errs() << "\n=== Band Information ===\n";
+
+ // Print loop structure
+ llvm::errs() << "Loop Structure:\n";
+ for (size_t i = 0; i < loopInfos.size(); i++) {
+ const auto &info = loopInfos[i];
+ llvm::errs() << " Loop " << i << ": [" << info.lowerBound << ", "
+ << info.upperBound << ") step " << info.step << "\n";
+ }
+
+ // Print only innermost loop's memory accesses
+ llvm::errs() << "\nMemory Accesses in Innermost Loop:\n";
+ const auto &innerLoop = loopInfos.back();
+ for (const auto &access : innerLoop.accesses) {
+ llvm::errs() << " " << (access.type == LoopInfo::AccessType::Load ? "Load" : "Store") << " from ";
+
+ // Print block argument information
+ if (auto blockArg = dyn_cast<BlockArgument>(access.memref)) {
+ llvm::errs() << "<block argument> of type '" << blockArg.getType()
+ << "' at index: " << blockArg.getArgNumber()
+ << " (arg" << blockArg.getArgNumber() << ")";
+ } else {
+ llvm::errs() << access.memref;
+ }
+ llvm::errs() << "\n";
+ llvm::errs() << " Access Map: " << access.accessMap << "\n";
+ }
+ llvm::errs() << "================================\n";
+}
+
+struct TTLOps : public PassWrapper<TTLOps, OperationPass<ModuleOp>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TTLOps)
+
+ // Default constructor
+ TTLOps() = default;
+
+ // Copy constructor - needed for pass cloning
+ TTLOps(const TTLOps &other) : PassWrapper<TTLOps, OperationPass<ModuleOp>>(other) {
+ // Copy option values
+ localMemorySize = other.localMemorySize;
+ loadCost = other.loadCost;
+ storeCost = other.storeCost;
+ }
+
+ // Pass options
+ Option<unsigned> localMemorySize{
+ *this, "local-memory-size",
+ llvm::cl::desc("Size of local memory in KB (default: 32)"),
+ llvm::cl::init(32)};
+ Option<unsigned> loadCost{
+ *this, "load-cost",
+ llvm::cl::desc("Cost of a load operation (default: 1)"),
+ llvm::cl::init(1)};
+ Option<unsigned> storeCost{
+ *this, "store-cost",
+ llvm::cl::desc("Cost of a store operation (default: 1)"),
+ llvm::cl::init(1)};
+
+ StringRef getArgument() const override { return "ttl-ops"; }
+ StringRef getDescription() const override { return "TTL operations pass"; }
+
+ void runOnOperation() override {
+ ModuleOp module = getOperation();
+
+ // Ensure we only have one function in the module
+ auto funcOps = module.getOps<func::FuncOp>();
+ assert(std::distance(funcOps.begin(), funcOps.end()) == 1 &&
+ "Expected exactly one function in the module");
+
+ // Find perfect loop nests (bands) in each function
+ module->walk([&](func::FuncOp funcOp) {
+ std::vector<SmallVector<AffineForOp, 6>> bands;
+ mlir::affine::getTileableBands(funcOp, &bands);
+
+ // Analyze each band
+ for (const auto &band : bands) {
+ // Validate band and collect information
+ if (auto loopInfos = LoopValidator::validateAndCollectInfo(band)) {
+ printLoopInfo(*loopInfos, funcOp);
+ }
+ }
+ });
+ }
+};
+
+// Register the pass
+void registerTTLOps() {
+ PassRegistration<TTLOps>();
+}
+} // end anonymous namespace
+
+namespace mlir {
+std::unique_ptr<Pass> createTTLOpsPass() {
+ return std::make_unique<TTLOps>();
+}
+} // end namespace mlir
\ No newline at end of file
diff --git a/mlir/lib/Transforms/TTLPipeline.cpp b/mlir/lib/Transforms/TTLPipeline.cpp
new file mode 100644
index 0000000000000..192e3c7c680e8
--- /dev/null
+++ b/mlir/lib/Transforms/TTLPipeline.cpp
@@ -0,0 +1,61 @@
+#include "mlir/Pass/Pass.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/Passes.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Analysis/Utils.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Affine/Passes.h"
+
+using namespace mlir;
+
+namespace {
+
+struct TTLPipeline : public PassWrapper<TTLPipeline, OperationPass<ModuleOp>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TTLPipeline)
+
+ // Default constructor
+ TTLPipeline() = default;
+
+ // Copy constructor - needed for pass cloning
+ TTLPipeline(const TTLPipeline &other) : PassWrapper<TTLPipeline, OperationPass<ModuleOp>>(other) {}
+
+ // Pass options
+ Option<unsigned> cacheSize{
+ *this, "cache-size",
+ llvm::cl::desc("Cache size in bytes for loop tiling (default: 32768)"),
+ llvm::cl::init(32768)};
+
+ StringRef getArgument() const override { return "ttl-pipeline"; }
+ StringRef getDescription() const override { return "TTL pipeline"; }
+
+ void runOnOperation() override {
+ ModuleOp module = getOperation();
+ PassManager pm(module->getContext());
+
+ // Add function-level passes using addNestedPass
+ pm.addNestedPass<func::FuncOp>(affine::createLoopTilingPass(cacheSize));
+
+ // Add module-level passes
+ pm.addPass(createTTLToEmitC());
+
+ // Run the pipeline
+ if (failed(pm.run(module))) {
+ signalPassFailure();
+ }
+ }
+};
+
+// Register the pass
+void registerTTLPipeline() {
+ PassRegistration<TTLPipeline>();
+}
+
+} // end anonymous namespace
+
+namespace mlir {
+std::unique_ptr<Pass> createTTLPipelinePass() {
+ return std::make_unique<TTLPipeline>();
+}
+} // end namespace mlir
\ No newline at end of file
diff --git a/mlir/lib/Transforms/TTLToEmitC.cpp b/mlir/lib/Transforms/TTLToEmitC.cpp
new file mode 100644
index 0000000000000..550013cc9e180
--- /dev/null
+++ b/mlir/lib/Transforms/TTLToEmitC.cpp
@@ -0,0 +1,63 @@
+#include "mlir/Pass/Pass.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/SCFToEmitC/SCFToEmitC.h"
+#include "mlir/Conversion/ArithToEmitC/ArithToEmitC.h"
+#include "mlir/Conversion/MathToEmitC/MathToEmitC.h"
+#include "mlir/Conversion/MemRefToEmitC/MemRefToEmitC.h"
+#include "mlir/Conversion/ConvertToEmitC/ConvertToEmitCPass.h"
+#include "mlir/Conversion/Passes.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+
+namespace {
+
+struct TTLToEmitC : public PassWrapper<TTLToEmitC, OperationPass<ModuleOp>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TTLToEmitC)
+
+ TTLToEmitC() = default;
+ TTLToEmitC(const TTLToEmitC &other) : PassWrapper<TTLToEmitC, OperationPass<ModuleOp>>(other) {}
+
+ StringRef getArgument() const override { return "ttl-to-emitc"; }
+ StringRef getDescription() const override { return "Convert TTL operations to EmitC dialect"; }
+
+ void runOnOperation() override {
+ ModuleOp module = getOperation();
+ PassManager pm(module->getContext());
+
+ // First convert Affine to SCF, MemRef, etc...
+ pm.addNestedPass<func::FuncOp>(createLowerAffinePass());
+
+ // Then convert all dialects to EmitC
+ pm.addNestedPass<func::FuncOp>(createConvertArithToEmitC());
+ pm.addNestedPass<func::FuncOp>(createConvertMathToEmitC());
+ pm.addNestedPass<func::FuncOp>(createConvertMemRefToEmitC());
+ pm.addNestedPass<func::FuncOp>(createSCFToEmitC());
+ pm.addNestedPass<func::FuncOp>(createConvertToEmitC());
+
+ // Clean up passes
+ pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
+
+ // Reconcile unrealized casts must run at module level
+ pm.addPass(createReconcileUnrealizedCastsPass());
+
+ if (failed(pm.run(module))) {
+ signalPassFailure();
+ }
+ }
+};
+
+void registerTTLToEmitC() {
+ PassRegistration<TTLToEmitC>();
+}
+
+} // end anonymous namespace
+
+namespace mlir {
+std::unique_ptr<Pass> createTTLToEmitC() {
+ return std::make_unique<TTLToEmitC>();
+}
+} // end namespace mlir
\ No newline at end of file
``````````
</details>
https://github.com/llvm/llvm-project/pull/143366
More information about the llvm-commits
mailing list