[flang-commits] [flang] [flang] Add nsw to DO loop parameters (PR #113854)

Sun Oct 27 20:12:37 PDT 2024

https://github.com/yus3710-fj created https://github.com/llvm/llvm-project/pull/113854

nsw is added to DO loop parameters (initial parameters, terminal parameters, and incrementation parameters).
This can help vectorization in some cases like #110609.

See also the discussion in https://discourse.llvm.org/t/rfc-add-nsw-flags-to-arithmetic-integer-operations-using-the-option-fno-wrapv/77584/20.

>From aa5b6f507d506f949481d4c5616f76fa9a12705f Mon Sep 17 00:00:00 2001
From: Yusuke MINATO <minato.yusuke at fujitsu.com>
Date: Tue, 8 Oct 2024 15:24:01 +0900
Subject: [PATCH] [flang] add nsw to DO loop parameters

---
 flang/lib/Lower/Bridge.cpp              |  8 +++
 flang/test/Lower/HLFIR/goto-do-body.f90 |  4 +-
 flang/test/Lower/goto-do-body.f90       |  4 +-
 flang/test/Lower/nsw.f90                | 95 +++++++++++++++++++++++++
 4 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a3bd1ace11da21..ecc81d211ee827 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2131,14 +2131,22 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
     assert(!incrementLoopNestInfo.empty() && "empty loop nest");
     mlir::Location loc = toLocation();
+    mlir::arith::IntegerOverflowFlags iofBackup{};
     for (IncrementLoopInfo &info : incrementLoopNestInfo) {
       info.loopVariable =
           genLoopVariableAddress(loc, *info.loopVariableSym, info.isUnordered);
+      if (!getLoweringOptions().getIntegerWrapAround()) {
+        iofBackup = builder->getIntegerOverflowFlags();
+        builder->setIntegerOverflowFlags(
+            mlir::arith::IntegerOverflowFlags::nsw);
+      }
       mlir::Value lowerValue = genControlValue(info.lowerExpr, info);
       mlir::Value upperValue = genControlValue(info.upperExpr, info);
       bool isConst = true;
       mlir::Value stepValue = genControlValue(
           info.stepExpr, info, info.isStructured() ? nullptr : &isConst);
+      if (!getLoweringOptions().getIntegerWrapAround())
+        builder->setIntegerOverflowFlags(iofBackup);
       // Use a temp variable for unstructured loops with non-const step.
       if (!isConst) {
         info.stepVariable = builder->createTemporary(loc, stepValue.getType());
diff --git a/flang/test/Lower/HLFIR/goto-do-body.f90 b/flang/test/Lower/HLFIR/goto-do-body.f90
index 5f5b09ccb8f7dc..9e2c07f8fa292d 100644
--- a/flang/test/Lower/HLFIR/goto-do-body.f90
+++ b/flang/test/Lower/HLFIR/goto-do-body.f90
@@ -83,8 +83,8 @@ subroutine sub2()
 
   do i = 1, 2, 3 * j - 8
 ! CHECK:    %[[TMP2:.*]] = fir.load %[[J]]#0 : !fir.ref<i32>
-! CHECK:    %[[TMP3:.*]] = arith.muli %[[TMP2]], %[[C3]] : i32
-! CHECK:    %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8]] : i32
+! CHECK:    %[[TMP3:.*]] = arith.muli %[[TMP2]], %[[C3]] overflow<nsw> : i32
+! CHECK:    %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8]] overflow<nsw> : i32
 ! CHECK:    fir.store %[[STEP]] to %[[STEP_VAR:.*]] : !fir.ref<i32>
 ! CHECK:    %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[C_7]] : i32
 ! CHECK:    %[[TMP5:.*]] = arith.divsi %[[TMP4]], %[[STEP]] : i32
diff --git a/flang/test/Lower/goto-do-body.f90 b/flang/test/Lower/goto-do-body.f90
index 89e4a7a64a87ba..880417c888104e 100644
--- a/flang/test/Lower/goto-do-body.f90
+++ b/flang/test/Lower/goto-do-body.f90
@@ -90,9 +90,9 @@ subroutine sub2()
 ! CHECK:    %[[C2_2:.*]] = arith.constant 2 : i32
 ! CHECK:    %[[C3_2:.*]] = arith.constant 3 : i32
 ! CHECK:    %[[TMP2:.*]] = fir.load %[[J]] : !fir.ref<i32>
-! CHECK:    %[[TMP3:.*]] = arith.muli %[[C3_2]], %[[TMP2]] : i32
+! CHECK:    %[[TMP3:.*]] = arith.muli %[[C3_2]], %[[TMP2]] overflow<nsw> : i32
 ! CHECK:    %[[C8_1:.*]] = arith.constant 8 : i32
-! CHECK:    %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8_1]] : i32
+! CHECK:    %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8_1]] overflow<nsw> : i32
 ! CHECK:    fir.store %[[STEP]] to %[[STEP_VAR:.*]] : !fir.ref<i32>
 ! CHECK:    %[[TMP4:.*]] = arith.subi %[[C2_2]], %[[C1_1]] : i32
 ! CHECK:    %[[TMP5:.*]] = arith.addi %[[TMP4]], %[[STEP]] : i32
diff --git a/flang/test/Lower/nsw.f90 b/flang/test/Lower/nsw.f90
index 84435b71330427..4ee9e5da829e61 100644
--- a/flang/test/Lower/nsw.f90
+++ b/flang/test/Lower/nsw.f90
@@ -59,3 +59,98 @@ subroutine bitwise_comparison(a, b)
 ! CHECK-LABEL:   func.func @_QPbitwise_comparison(
 ! CHECK-NOT: overflow<nsw>
 ! CHECK:           return
+
+subroutine loop_params(a,lb,ub,st)
+  integer :: i, lb, ub, st
+  integer :: a(lb:ub)
+  do i = lb+1, ub-1, st*2
+    a(i) = i
+  end do
+end subroutine
+! CHECK-LABEL:   func.func @_QPloop_params(
+! CHECK:           %[[VAL_4:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_9:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK:           %[[VAL_10:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK:           %[[VAL_12:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK:           %[[VAL_13:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK:           %[[VAL_14:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:           %[[VAL_16:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK:           %[[VAL_25:.*]] = arith.addi %[[VAL_14]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK:           %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i32) -> index
+! CHECK:           %[[VAL_27:.*]] = arith.subi %[[VAL_16]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK:           %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> index
+! CHECK:           %[[VAL_29:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK:           %[[VAL_30:.*]] = arith.muli %[[VAL_29]], %[[VAL_4]] overflow<nsw> : i32
+! CHECK:           %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> index
+! CHECK:           %[[VAL_32:.*]] = fir.convert %[[VAL_26]] : (index) -> i32
+! CHECK:           %[[VAL_33:.*]]:2 = fir.do_loop %[[VAL_34:.*]] = %[[VAL_26]] to %[[VAL_28]] step %[[VAL_31]] iter_args(%[[VAL_35:.*]] = %[[VAL_32]]) -> (index, i32) {
+
+subroutine loop_params2(a,lb,ub,st)
+  integer :: i, lb, ub, st
+  integer :: a(lb:ub)
+  real :: ii
+  do ii = lb+1, ub-1, st*2
+    i = ii
+    a(i) = i
+  end do
+end subroutine
+! CHECK-LABEL:   func.func @_QPloop_params2(
+! CHECK:           %[[VAL_4:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_8:.*]] = fir.alloca index
+! CHECK:           %[[VAL_9:.*]] = fir.alloca f32
+! CHECK:           %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK:           %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK:           %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK:           %[[VAL_16:.*]] = fir.declare %{{.*}}ii"} : (!fir.ref<f32>) -> !fir.ref<f32>
+! CHECK:           %[[VAL_17:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK:           %[[VAL_18:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:           %[[VAL_20:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK:           %[[VAL_29:.*]] = arith.addi %[[VAL_18]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK:           %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> f32
+! CHECK:           %[[VAL_31:.*]] = arith.subi %[[VAL_20]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK:           %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> f32
+! CHECK:           %[[VAL_33:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK:           %[[VAL_34:.*]] = arith.muli %[[VAL_33]], %[[VAL_4]] overflow<nsw> : i32
+! CHECK:           %[[VAL_35:.*]] = fir.convert %[[VAL_34]] : (i32) -> f32
+! CHECK:           fir.store %[[VAL_35]] to %[[VAL_9]] : !fir.ref<f32>
+! CHECK:           %[[VAL_36:.*]] = arith.subf %[[VAL_32]], %[[VAL_30]] fastmath<contract> : f32
+! CHECK:           %[[VAL_37:.*]] = arith.addf %[[VAL_36]], %[[VAL_35]] fastmath<contract> : f32
+! CHECK:           %[[VAL_38:.*]] = arith.divf %[[VAL_37]], %[[VAL_35]] fastmath<contract> : f32
+! CHECK:           %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (f32) -> index
+! CHECK:           fir.store %[[VAL_39]] to %[[VAL_8]] : !fir.ref<index>
+! CHECK:           fir.store %[[VAL_30]] to %[[VAL_16]] : !fir.ref<f32>
+! CHECK:           cf.br ^bb1
+! CHECK:         ^bb1:
+! CHECK:           %[[VAL_40:.*]] = fir.load %[[VAL_8]] : !fir.ref<index>
+! CHECK:           %[[VAL_41:.*]] = arith.cmpi sgt, %[[VAL_40]], %[[VAL_6]] : index
+! CHECK:           cf.cond_br %[[VAL_41]], ^bb2, ^bb3
+! CHECK:         ^bb2:
+
+subroutine loop_params3(a,lb,ub,st)
+  integer :: i, lb, ub, st
+  integer :: a(lb:ub)
+  do concurrent (i=lb+1:ub-1:st*2)
+    a(i) = i
+  end do
+end subroutine
+! CHECK-LABEL:   func.func @_QPloop_params3(
+! CHECK:           %[[VAL_4:.*]] = arith.constant 2 : i32
+! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_9:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK:           %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK:           %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK:           %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK:           %[[VAL_15:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK:           %[[VAL_16:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK:           %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK:           %[[VAL_27:.*]] = arith.addi %[[VAL_16]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK:           %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> index
+! CHECK:           %[[VAL_29:.*]] = arith.subi %[[VAL_18]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK:           %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> index
+! CHECK:           %[[VAL_31:.*]] = fir.load %[[VAL_15]] : !fir.ref<i32>
+! CHECK:           %[[VAL_32:.*]] = arith.muli %[[VAL_31]], %[[VAL_4]] overflow<nsw> : i32
+! CHECK:           %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> index
+! CHECK:           fir.do_loop %[[VAL_34:.*]] = %[[VAL_28]] to %[[VAL_30]] step %[[VAL_33]] unordered {