[flang-commits] [flang] [flang] Add nsw to DO loop parameters (PR #113854)
Yusuke MINATO via flang-commits
flang-commits at lists.llvm.org
Sun Oct 27 20:12:37 PDT 2024
https://github.com/yus3710-fj created https://github.com/llvm/llvm-project/pull/113854
nsw is added to DO loop parameters (initial parameters, terminal parameters, and incrementation parameters).
This can help vectorization in some cases like #110609.
See also the discussion in https://discourse.llvm.org/t/rfc-add-nsw-flags-to-arithmetic-integer-operations-using-the-option-fno-wrapv/77584/20.
>From aa5b6f507d506f949481d4c5616f76fa9a12705f Mon Sep 17 00:00:00 2001
From: Yusuke MINATO <minato.yusuke at fujitsu.com>
Date: Tue, 8 Oct 2024 15:24:01 +0900
Subject: [PATCH] [flang] add nsw to DO loop parameters
---
flang/lib/Lower/Bridge.cpp | 8 +++
flang/test/Lower/HLFIR/goto-do-body.f90 | 4 +-
flang/test/Lower/goto-do-body.f90 | 4 +-
flang/test/Lower/nsw.f90 | 95 +++++++++++++++++++++++++
4 files changed, 107 insertions(+), 4 deletions(-)
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a3bd1ace11da21..ecc81d211ee827 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2131,14 +2131,22 @@ class FirConverter : public Fortran::lower::AbstractConverter {
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
mlir::Location loc = toLocation();
+ mlir::arith::IntegerOverflowFlags iofBackup{};
for (IncrementLoopInfo &info : incrementLoopNestInfo) {
info.loopVariable =
genLoopVariableAddress(loc, *info.loopVariableSym, info.isUnordered);
+ if (!getLoweringOptions().getIntegerWrapAround()) {
+ iofBackup = builder->getIntegerOverflowFlags();
+ builder->setIntegerOverflowFlags(
+ mlir::arith::IntegerOverflowFlags::nsw);
+ }
mlir::Value lowerValue = genControlValue(info.lowerExpr, info);
mlir::Value upperValue = genControlValue(info.upperExpr, info);
bool isConst = true;
mlir::Value stepValue = genControlValue(
info.stepExpr, info, info.isStructured() ? nullptr : &isConst);
+ if (!getLoweringOptions().getIntegerWrapAround())
+ builder->setIntegerOverflowFlags(iofBackup);
// Use a temp variable for unstructured loops with non-const step.
if (!isConst) {
info.stepVariable = builder->createTemporary(loc, stepValue.getType());
diff --git a/flang/test/Lower/HLFIR/goto-do-body.f90 b/flang/test/Lower/HLFIR/goto-do-body.f90
index 5f5b09ccb8f7dc..9e2c07f8fa292d 100644
--- a/flang/test/Lower/HLFIR/goto-do-body.f90
+++ b/flang/test/Lower/HLFIR/goto-do-body.f90
@@ -83,8 +83,8 @@ subroutine sub2()
do i = 1, 2, 3 * j - 8
! CHECK: %[[TMP2:.*]] = fir.load %[[J]]#0 : !fir.ref<i32>
-! CHECK: %[[TMP3:.*]] = arith.muli %[[TMP2]], %[[C3]] : i32
-! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8]] : i32
+! CHECK: %[[TMP3:.*]] = arith.muli %[[TMP2]], %[[C3]] overflow<nsw> : i32
+! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8]] overflow<nsw> : i32
! CHECK: fir.store %[[STEP]] to %[[STEP_VAR:.*]] : !fir.ref<i32>
! CHECK: %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[C_7]] : i32
! CHECK: %[[TMP5:.*]] = arith.divsi %[[TMP4]], %[[STEP]] : i32
diff --git a/flang/test/Lower/goto-do-body.f90 b/flang/test/Lower/goto-do-body.f90
index 89e4a7a64a87ba..880417c888104e 100644
--- a/flang/test/Lower/goto-do-body.f90
+++ b/flang/test/Lower/goto-do-body.f90
@@ -90,9 +90,9 @@ subroutine sub2()
! CHECK: %[[C2_2:.*]] = arith.constant 2 : i32
! CHECK: %[[C3_2:.*]] = arith.constant 3 : i32
! CHECK: %[[TMP2:.*]] = fir.load %[[J]] : !fir.ref<i32>
-! CHECK: %[[TMP3:.*]] = arith.muli %[[C3_2]], %[[TMP2]] : i32
+! CHECK: %[[TMP3:.*]] = arith.muli %[[C3_2]], %[[TMP2]] overflow<nsw> : i32
! CHECK: %[[C8_1:.*]] = arith.constant 8 : i32
-! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8_1]] : i32
+! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8_1]] overflow<nsw> : i32
! CHECK: fir.store %[[STEP]] to %[[STEP_VAR:.*]] : !fir.ref<i32>
! CHECK: %[[TMP4:.*]] = arith.subi %[[C2_2]], %[[C1_1]] : i32
! CHECK: %[[TMP5:.*]] = arith.addi %[[TMP4]], %[[STEP]] : i32
diff --git a/flang/test/Lower/nsw.f90 b/flang/test/Lower/nsw.f90
index 84435b71330427..4ee9e5da829e61 100644
--- a/flang/test/Lower/nsw.f90
+++ b/flang/test/Lower/nsw.f90
@@ -59,3 +59,98 @@ subroutine bitwise_comparison(a, b)
! CHECK-LABEL: func.func @_QPbitwise_comparison(
! CHECK-NOT: overflow<nsw>
! CHECK: return
+
+subroutine loop_params(a,lb,ub,st)
+ integer :: i, lb, ub, st
+ integer :: a(lb:ub)
+ do i = lb+1, ub-1, st*2
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPloop_params(
+! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_14]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i32) -> index
+! CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_16]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> index
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK: %[[VAL_30:.*]] = arith.muli %[[VAL_29]], %[[VAL_4]] overflow<nsw> : i32
+! CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> index
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_26]] : (index) -> i32
+! CHECK: %[[VAL_33:.*]]:2 = fir.do_loop %[[VAL_34:.*]] = %[[VAL_26]] to %[[VAL_28]] step %[[VAL_31]] iter_args(%[[VAL_35:.*]] = %[[VAL_32]]) -> (index, i32) {
+
+subroutine loop_params2(a,lb,ub,st)
+ integer :: i, lb, ub, st
+ integer :: a(lb:ub)
+ real :: ii
+ do ii = lb+1, ub-1, st*2
+ i = ii
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPloop_params2(
+! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_8:.*]] = fir.alloca index
+! CHECK: %[[VAL_9:.*]] = fir.alloca f32
+! CHECK: %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.declare %{{.*}}ii"} : (!fir.ref<f32>) -> !fir.ref<f32>
+! CHECK: %[[VAL_17:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_18]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> f32
+! CHECK: %[[VAL_31:.*]] = arith.subi %[[VAL_20]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> f32
+! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK: %[[VAL_34:.*]] = arith.muli %[[VAL_33]], %[[VAL_4]] overflow<nsw> : i32
+! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_34]] : (i32) -> f32
+! CHECK: fir.store %[[VAL_35]] to %[[VAL_9]] : !fir.ref<f32>
+! CHECK: %[[VAL_36:.*]] = arith.subf %[[VAL_32]], %[[VAL_30]] fastmath<contract> : f32
+! CHECK: %[[VAL_37:.*]] = arith.addf %[[VAL_36]], %[[VAL_35]] fastmath<contract> : f32
+! CHECK: %[[VAL_38:.*]] = arith.divf %[[VAL_37]], %[[VAL_35]] fastmath<contract> : f32
+! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (f32) -> index
+! CHECK: fir.store %[[VAL_39]] to %[[VAL_8]] : !fir.ref<index>
+! CHECK: fir.store %[[VAL_30]] to %[[VAL_16]] : !fir.ref<f32>
+! CHECK: cf.br ^bb1
+! CHECK: ^bb1:
+! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_8]] : !fir.ref<index>
+! CHECK: %[[VAL_41:.*]] = arith.cmpi sgt, %[[VAL_40]], %[[VAL_6]] : index
+! CHECK: cf.cond_br %[[VAL_41]], ^bb2, ^bb3
+! CHECK: ^bb2:
+
+subroutine loop_params3(a,lb,ub,st)
+ integer :: i, lb, ub, st
+ integer :: a(lb:ub)
+ do concurrent (i=lb+1:ub-1:st*2)
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPloop_params3(
+! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_16]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> index
+! CHECK: %[[VAL_29:.*]] = arith.subi %[[VAL_18]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> index
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_15]] : !fir.ref<i32>
+! CHECK: %[[VAL_32:.*]] = arith.muli %[[VAL_31]], %[[VAL_4]] overflow<nsw> : i32
+! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> index
+! CHECK: fir.do_loop %[[VAL_34:.*]] = %[[VAL_28]] to %[[VAL_30]] step %[[VAL_33]] unordered {
More information about the flang-commits
mailing list