[Mlir-commits] [mlir] [mlir] Fix region simplification bug when later blocks use prior block argument values (PR #97960)
Ben Howe
llvmlistbot at llvm.org
Mon Aug 5 18:04:43 PDT 2024
================
@@ -0,0 +1,49 @@
+// Baseline check
+// RUN: mlir-opt %s --convert-func-to-llvm --convert-cf-to-llvm | \
+// RUN: mlir-cpu-runner -e nested_loop --entry-point-result=i32 | FileCheck %s
+
+// Region simplification check
+// RUN: mlir-opt %s \
+// RUN: --canonicalize='enable-patterns=AnyPattern region-simplify=aggressive' \
+// RUN: --convert-func-to-llvm --convert-cf-to-llvm | mlir-cpu-runner \
+// RUN: -e nested_loop --entry-point-result=i32 | FileCheck %s
+
+func.func @nested_loop() -> i32 {
+ %c3_i64 = arith.constant 3 : i64
+ %c2_i64 = arith.constant 2 : i64
+ %c0_i64 = arith.constant 0 : i64
+ %c1_i64 = arith.constant 1 : i64
+ %c1_i32 = arith.constant 1 : i32
+ %c0_i32 = arith.constant 0 : i32
+ cf.br ^bb1(%c0_i32, %c0_i64 : i32, i64)
+^bb1(%0: i32, %1: i64): // 2 preds: ^bb0, ^bb8
+ %2 = arith.cmpi ult, %1, %c2_i64 : i64
+ cf.cond_br %2, ^bb2(%0, %1 : i32, i64), ^bb9(%0, %1 : i32, i64)
+^bb2(%3: i32, %4: i64): // pred: ^bb1
+ %5 = arith.addi %4, %c1_i64 : i64
+ cf.br ^bb3(%3, %5 : i32, i64)
+^bb3(%6: i32, %7: i64): // 2 preds: ^bb2, ^bb5
+ %8 = arith.cmpi ult, %7, %c3_i64 : i64
+ cf.cond_br %8, ^bb4(%6, %7 : i32, i64), ^bb6(%6, %7 : i32, i64)
+^bb4(%9: i32, %10: i64): // pred: ^bb3
+ %11 = arith.addi %9, %c1_i32 : i32
+ cf.br ^bb5(%11, %10 : i32, i64)
+^bb5(%12: i32, %13: i64): // pred: ^bb4
+ %14 = arith.addi %13, %c1_i64 : i64
+ cf.br ^bb3(%12, %14 : i32, i64)
+^bb6(%15: i32, %16: i64): // pred: ^bb3
+ cf.br ^bb7
+^bb7: // pred: ^bb6
+ cf.br ^bb8(%15, %4 : i32, i64)
+^bb8(%17: i32, %18: i64): // pred: ^bb7
+ %19 = arith.addi %18, %c1_i64 : i64
+ cf.br ^bb1(%17, %19 : i32, i64)
+^bb9(%20: i32, %21: i64): // pred: ^bb1
+ cf.br ^bb10
+^bb10: // pred: ^bb9
+ return %20 : i32
+}
+
+// If region simplification behaves correctly (by NOT merging ^bb2 and ^bb5),
+// this will be 3.
+// CHECK: 3
----------------
bmhowe23 wrote:
FWIW, here is the smallest example I can construct that shows the problem in `main` and demonstrates that it is fixed with the changes in this PR:
```mlir
func.func @nested_loop() -> i32 {
%c0_i32 = arith.constant 0 : i32
%c1_i32 = arith.constant 1 : i32
%c2_i32 = arith.constant 2 : i32
%c3_i32 = arith.constant 3 : i32
cf.br ^bb1(%c0_i32, %c0_i32 : i32, i32)
^bb1(%0: i32, %1: i32):
%2 = arith.cmpi ult, %1, %c2_i32 : i32
cf.cond_br %2, ^bb2(%0, %1 : i32, i32), ^bb7(%0 : i32)
^bb2(%3: i32, %4: i32):
%5 = arith.addi %4, %c1_i32 : i32
cf.br ^bb3(%3, %5 : i32, i32)
^bb3(%6: i32, %7: i32):
%8 = arith.cmpi ult, %7, %c3_i32 : i32
cf.cond_br %8, ^bb4(%6, %7 : i32, i32), ^bb6(%6, %4 : i32, i32)
^bb4(%9: i32, %10: i32):
%11 = arith.addi %9, %c1_i32 : i32
cf.br ^bb5(%11, %10 : i32, i32)
^bb5(%12: i32, %13: i32):
%14 = arith.addi %13, %c1_i32 : i32
cf.br ^bb3(%12, %14 : i32, i32)
^bb6(%15: i32, %16: i32):
%17 = arith.addi %16, %c1_i32 : i32
cf.br ^bb1(%15, %17 : i32, i32)
^bb7(%18: i32):
return %18 : i32
}
```
If you place the above into `test.mlir` and run with the following command, the latest on `main` will return `2` while the fixed version returns `3`.
```bash
$ build/bin/mlir-opt --canonicalize='enable-patterns=AnyPattern region-simplify=aggressive' --convert-func-to-llvm --convert-cf-to-llvm test.mlir | build/bin/mlir-cpu-runner -e nested_loop --entry-point-result=i32
```
In this example, the original problem reported by the original issue occurs when `^bb2` and `^bb5` are merged.
https://github.com/llvm/llvm-project/pull/97960
More information about the Mlir-commits
mailing list