[llvm] [mlir] add ch1~ch3 note (PR #170624)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 4 00:11:20 PST 2025


https://github.com/hellozmz created https://github.com/llvm/llvm-project/pull/170624

None

>From 0adefd3592874ac8c137d1141e3280e24cb9ff24 Mon Sep 17 00:00:00 2001
From: hellozmz <407190054 at qq.com>
Date: Thu, 4 Dec 2025 08:09:31 +0000
Subject: [PATCH] add ch1~ch3 note

---
 build.sh                                  | 17 +++++++++++++++++
 mlir/examples/toy/Ch1/include/toy/AST.h   | 19 ++++++++++---------
 mlir/examples/toy/Ch1/include/toy/Lexer.h | 16 ++++++++--------
 mlir/examples/toy/Ch1/toyc.cpp            |  6 +++---
 mlir/examples/toy/Ch2/compile.sh          |  5 +++++
 mlir/examples/toy/Ch2/include/toy/Ops.td  |  1 +
 mlir/examples/toy/Ch2/toyc.cpp            | 15 ++++++++-------
 mlir/examples/toy/Ch3/toyc.cpp            |  2 +-
 mlir/test/Examples/Toy/Ch1/test.sh        |  7 +++++++
 mlir/test/Examples/Toy/Ch2/invalid.mlir   |  3 ++-
 mlir/test/Examples/Toy/Ch2/test.sh        |  9 +++++++++
 mlir/test/Examples/Toy/Ch3/test.sh        | 20 ++++++++++++++++++++
 12 files changed, 91 insertions(+), 29 deletions(-)
 create mode 100644 build.sh
 create mode 100644 mlir/examples/toy/Ch2/compile.sh
 create mode 100644 mlir/test/Examples/Toy/Ch1/test.sh
 create mode 100644 mlir/test/Examples/Toy/Ch2/test.sh
 create mode 100644 mlir/test/Examples/Toy/Ch3/test.sh

diff --git a/build.sh b/build.sh
new file mode 100644
index 0000000000000..29b576aaee0fc
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,17 @@
+mkdir llvm-project/build
+cd llvm-project/build
+cmake -G Ninja ../llvm \
+   -DLLVM_ENABLE_PROJECTS=mlir \
+   -DLLVM_BUILD_EXAMPLES=ON \
+   -DLLVM_TARGETS_TO_BUILD="Native;" \
+   -DCMAKE_BUILD_TYPE=Release \
+   -DLLVM_ENABLE_ASSERTIONS=ON
+# Using clang and lld speeds up the build, we recommend adding:
+#  -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DLLVM_ENABLE_LLD=ON
+# CCache can drastically speed up further rebuilds, try adding:
+#  -DLLVM_CCACHE_BUILD=ON
+# Optionally, using ASAN/UBSAN can find bugs early in development, enable with:
+# -DLLVM_USE_SANITIZER="Address;Undefined"
+# Optionally, enabling integration tests as well
+# -DMLIR_INCLUDE_INTEGRATION_TESTS=ON
+cmake --build . --target check-mlir
diff --git a/mlir/examples/toy/Ch1/include/toy/AST.h b/mlir/examples/toy/Ch1/include/toy/AST.h
index d2ba101dea5a4..b91254dd188f3 100644
--- a/mlir/examples/toy/Ch1/include/toy/AST.h
+++ b/mlir/examples/toy/Ch1/include/toy/AST.h
@@ -31,18 +31,19 @@ struct VarType {
   std::vector<int64_t> shape;
 };
 
+// 源代码 --> 词法分析(Lexer) --> 语法分析(Parser) --> AST --> 语义分析 --> 中间代码生成
 /// Base class for all expression nodes.
-class ExprAST {
+class ExprAST {       // Abstract Syntax Tree,抽象语法树
 public:
   enum ExprASTKind {
-    Expr_VarDecl,
-    Expr_Return,
-    Expr_Num,
-    Expr_Literal,
-    Expr_Var,
-    Expr_BinOp,
-    Expr_Call,
-    Expr_Print,
+    Expr_VarDecl,     // 定义变量
+    Expr_Return,      // 返回操作
+    Expr_Num,         // 数值类型
+    Expr_Literal,     // 文本类型
+    Expr_Var,         // 引用变量
+    Expr_BinOp,       // 两元素操作
+    Expr_Call,        // 函数调用
+    Expr_Print,       // 内置打印功能
   };
 
   ExprAST(ExprASTKind kind, Location location)
diff --git a/mlir/examples/toy/Ch1/include/toy/Lexer.h b/mlir/examples/toy/Ch1/include/toy/Lexer.h
index d420a7ebbf3b6..f5bb729a159c7 100644
--- a/mlir/examples/toy/Ch1/include/toy/Lexer.h
+++ b/mlir/examples/toy/Ch1/include/toy/Lexer.h
@@ -140,13 +140,13 @@ class Lexer {
       while (isalnum((lastChar = Token(getNextChar()))) || lastChar == '_')
         identifierStr += (char)lastChar;
 
-      if (identifierStr == "return")
+      if (identifierStr == "return")      // 返回语句
         return tok_return;
-      if (identifierStr == "def")
+      if (identifierStr == "def")         // 函数定义
         return tok_def;
-      if (identifierStr == "var")
+      if (identifierStr == "var")         // 变量定义
         return tok_var;
-      return tok_identifier;
+      return tok_identifier;              // 标识符
     }
 
     // Number: [0-9.]+
@@ -158,10 +158,10 @@ class Lexer {
       } while (isdigit(lastChar) || lastChar == '.');
 
       numVal = strtod(numStr.c_str(), nullptr);
-      return tok_number;
+      return tok_number;                  // 数字
     }
 
-    if (lastChar == '#') {
+    if (lastChar == '#') {                // 注释
       // Comment until end of line.
       do {
         lastChar = Token(getNextChar());
@@ -173,12 +173,12 @@ class Lexer {
 
     // Check for end of file.  Don't eat the EOF.
     if (lastChar == EOF)
-      return tok_eof;
+      return tok_eof;                     // 文件结束
 
     // Otherwise, just return the character as its ascii value.
     Token thisChar = Token(lastChar);
     lastChar = Token(getNextChar());
-    return thisChar;
+    return thisChar;                      // 其他字符
   }
 
   /// The last token read from the input.
diff --git a/mlir/examples/toy/Ch1/toyc.cpp b/mlir/examples/toy/Ch1/toyc.cpp
index fb7b484a92fb9..d9064411934ec 100644
--- a/mlir/examples/toy/Ch1/toyc.cpp
+++ b/mlir/examples/toy/Ch1/toyc.cpp
@@ -31,7 +31,7 @@ static cl::opt<std::string> inputFilename(cl::Positional,
                                           cl::init("-"),
                                           cl::value_desc("filename"));
 namespace {
-enum Action { None, DumpAST };
+enum Action { None, DumpAST };    // 两种操作,不操作和输出AST(dump)
 } // namespace
 
 static cl::opt<enum Action>
@@ -47,8 +47,8 @@ std::unique_ptr<toy::ModuleAST> parseInputFile(llvm::StringRef filename) {
     return nullptr;
   }
   auto buffer = fileOrErr.get()->getBuffer();
-  LexerBuffer lexer(buffer.begin(), buffer.end(), std::string(filename));
-  Parser parser(lexer);
+  LexerBuffer lexer(buffer.begin(), buffer.end(), std::string(filename));   // 词法分析
+  Parser parser(lexer);       // 语法分析
   return parser.parseModule();
 }
 
diff --git a/mlir/examples/toy/Ch2/compile.sh b/mlir/examples/toy/Ch2/compile.sh
new file mode 100644
index 0000000000000..877106d551915
--- /dev/null
+++ b/mlir/examples/toy/Ch2/compile.sh
@@ -0,0 +1,5 @@
+
+build_root=/mnt/data01/zmz/workspace/07ascendnpu/llvm/llvm-project/build
+mlir_src_root=/mnt/data01/zmz/workspace/07ascendnpu/llvm/llvm-project/mlir
+
+${build_root}/bin/mlir-tblgen -gen-op-defs ${mlir_src_root}/examples/toy/Ch2/include/toy/Ops.td -I ${mlir_src_root}/include/
\ No newline at end of file
diff --git a/mlir/examples/toy/Ch2/include/toy/Ops.td b/mlir/examples/toy/Ch2/include/toy/Ops.td
index 91bf83a54df1a..e07930560fe91 100644
--- a/mlir/examples/toy/Ch2/include/toy/Ops.td
+++ b/mlir/examples/toy/Ch2/include/toy/Ops.td
@@ -78,6 +78,7 @@ def ConstantOp : Toy_Op<"constant", [Pure]> {
     }]>,
 
     // Build a constant with a given constant floating-point value.
+    // 下面创建的函数,没有函数的实现,只有定义,具体的实现在Dialect.cpp中
     OpBuilder<(ins "double":$value)>
   ];
 
diff --git a/mlir/examples/toy/Ch2/toyc.cpp b/mlir/examples/toy/Ch2/toyc.cpp
index e33b49b41c5a1..07572d690149b 100644
--- a/mlir/examples/toy/Ch2/toyc.cpp
+++ b/mlir/examples/toy/Ch2/toyc.cpp
@@ -50,7 +50,7 @@ static cl::opt<enum InputType> inputType(
                           "load the input file as an MLIR file")));
 
 namespace {
-enum Action { None, DumpAST, DumpMLIR };
+enum Action { None, DumpAST, DumpMLIR };    // 增加了dump mlir的选项。现在源代码可以dump成抽象语法树,也可以dump成mlir(这个是重点)。
 } // namespace
 static cl::opt<enum Action> emitAction(
     "emit", cl::desc("Select the kind of output desired"),
@@ -71,14 +71,14 @@ std::unique_ptr<toy::ModuleAST> parseInputFile(llvm::StringRef filename) {
   return parser.parseModule();
 }
 
-int dumpMLIR() {
+int dumpMLIR() {                                        // dump mlir逻辑
   mlir::MLIRContext context;
   // Load our Dialect in this MLIR Context.
-  context.getOrLoadDialect<mlir::toy::ToyDialect>();
+  context.getOrLoadDialect<mlir::toy::ToyDialect>();    // 增加ToyDialect方言,完全自定义
 
   // Handle '.toy' input to the compiler.
   if (inputType != InputType::MLIR &&
-      !llvm::StringRef(inputFilename).ends_with(".mlir")) {
+      !llvm::StringRef(inputFilename).ends_with(".mlir")) {     // 处理非mlir文件,也就是toy文件
     auto moduleAST = parseInputFile(inputFilename);
     if (!moduleAST)
       return 6;
@@ -102,13 +102,13 @@ int dumpMLIR() {
   llvm::SourceMgr sourceMgr;
   sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), llvm::SMLoc());
   mlir::OwningOpRef<mlir::ModuleOp> module =
-      mlir::parseSourceFile<mlir::ModuleOp>(sourceMgr, &context);
+      mlir::parseSourceFile<mlir::ModuleOp>(sourceMgr, &context);             // 解析mlir文件
   if (!module) {
     llvm::errs() << "Error can't load file " << inputFilename << "\n";
     return 3;
   }
 
-  module->dump();
+  module->dump();                                     // 打印mlir中间表示
   return 0;
 }
 
@@ -130,7 +130,8 @@ int main(int argc, char **argv) {
   // Register any command line options.
   mlir::registerAsmPrinterCLOptions();
   mlir::registerMLIRContextCLOptions();
-  cl::ParseCommandLineOptions(argc, argv, "toy compiler\n");
+  // example: toyc-ch2 %s -emit=mlir
+  cl::ParseCommandLineOptions(argc, argv, "toy compiler\n");      // 给 inputType 和 emitAction 赋值
 
   switch (emitAction) {
   case Action::DumpAST:
diff --git a/mlir/examples/toy/Ch3/toyc.cpp b/mlir/examples/toy/Ch3/toyc.cpp
index f8aa846582267..e31972c8eaa11 100644
--- a/mlir/examples/toy/Ch3/toyc.cpp
+++ b/mlir/examples/toy/Ch3/toyc.cpp
@@ -125,7 +125,7 @@ int dumpMLIR() {
       return 4;
 
     // Add a run of the canonicalizer to optimize the mlir module.
-    pm.addNestedPass<mlir::toy::FuncOp>(mlir::createCanonicalizerPass());
+    pm.addNestedPass<mlir::toy::FuncOp>(mlir::createCanonicalizerPass());   // getCanonicalizationPatterns
     if (mlir::failed(pm.run(*module)))
       return 4;
   }
diff --git a/mlir/test/Examples/Toy/Ch1/test.sh b/mlir/test/Examples/Toy/Ch1/test.sh
new file mode 100644
index 0000000000000..b1e46cc27d97b
--- /dev/null
+++ b/mlir/test/Examples/Toy/Ch1/test.sh
@@ -0,0 +1,7 @@
+export PATH=/mnt/data01/zmz/workspace/07ascendnpu/llvm/llvm-project/build/bin/:$PATH
+
+toyc-ch1 ./empty.toy -emit=ast 2>&1 | FileCheck ./empty.toy
+
+toyc-ch1 ./ast.toy -emit=ast 2>&1 | FileCheck ./ast.toy
+
+toyc-ch1 ./ast.toy -emit=ast
\ No newline at end of file
diff --git a/mlir/test/Examples/Toy/Ch2/invalid.mlir b/mlir/test/Examples/Toy/Ch2/invalid.mlir
index b3ff353f93041..bddaa2ef41bf3 100644
--- a/mlir/test/Examples/Toy/Ch2/invalid.mlir
+++ b/mlir/test/Examples/Toy/Ch2/invalid.mlir
@@ -5,5 +5,6 @@
 // - toy.print should take an argument.
 // - There should be a block terminator.
 toy.func @main() {
-  %0 = "toy.print"()  : () -> tensor<2x3xf64>
+  // %0 = "toy.print"()  : () -> tensor<2x3xf64>
+  %0 = "toy.print"() // : () -> tensor<2x3xf64>
 }
diff --git a/mlir/test/Examples/Toy/Ch2/test.sh b/mlir/test/Examples/Toy/Ch2/test.sh
new file mode 100644
index 0000000000000..b398e9384a084
--- /dev/null
+++ b/mlir/test/Examples/Toy/Ch2/test.sh
@@ -0,0 +1,9 @@
+export PATH=/mnt/data01/zmz/workspace/07ascendnpu/llvm/llvm-project/build/bin/:$PATH
+
+toyc-ch2 ./invalid.mlir -emit=mlir 2>&1 
+
+toyc-ch2 ./scalar.toy -emit=mlir 2>&1 # | FileCheck ./scalar.toy
+
+toyc-ch2 ./codegen.toy -emit=mlir -mlir-print-debuginfo 2>&1 # | FileCheck ./codegen.toy
+
+# toyc-ch2 ./codegen.toy -emit=error 2>&1 # | FileCheck ./codegen.toy
diff --git a/mlir/test/Examples/Toy/Ch3/test.sh b/mlir/test/Examples/Toy/Ch3/test.sh
new file mode 100644
index 0000000000000..eacd3566a2a97
--- /dev/null
+++ b/mlir/test/Examples/Toy/Ch3/test.sh
@@ -0,0 +1,20 @@
+export PATH=/mnt/data01/zmz/workspace/07ascendnpu/llvm/llvm-project/build/bin/:$PATH
+
+# toyc-ch3 ./invalid.mlir -emit=mlir  2>&1 
+
+toyc-ch3 ./scalar.toy -emit=mlir 2>&1 # | FileCheck ./scalar.toy
+toyc-ch3 ./scalar.toy -emit=mlir 2>&1 | FileCheck ./scalar.toy
+
+# toyc-ch3 ./codegen.toy -emit=mlir -mlir-print-debuginfo 2>&1 # | FileCheck ./codegen.toy
+
+# toyc-ch3 ./codegen.toy -emit=error 2>&1 # | FileCheck ./codegen.toy
+
+toyc-ch3 ./codegen.toy -emit=mlir 2>&1 | FileCheck ./codegen.toy
+
+toyc-ch3 ./transpose_transpose.toy -emit=mlir -opt       # 添加优化选项,可以执行SimplifyRedundantTranspose,将两个transpose给消除掉
+
+toyc-ch3 ./transpose_transpose.toy -emit=mlir
+
+toyc-ch3 ./trivial_reshape.toy -emit=mlir
+toyc-ch3 ./trivial_reshape.toy -emit=mlir -opt
+



More information about the llvm-commits mailing list