[flang-commits] [flang] e573c6b - [flang] Add nsw to DO loop parameters (#113854)
via flang-commits
flang-commits at lists.llvm.org
Wed Nov 27 15:58:12 PST 2024
Author: Yusuke MINATO
Date: 2024-11-28T08:58:09+09:00
New Revision: e573c6b67eb729a625431121139100bebc61ba1f
URL: https://github.com/llvm/llvm-project/commit/e573c6b67eb729a625431121139100bebc61ba1f
DIFF: https://github.com/llvm/llvm-project/commit/e573c6b67eb729a625431121139100bebc61ba1f.diff
LOG: [flang] Add nsw to DO loop parameters (#113854)
nsw is added to DO loop parameters (initial parameters, terminal
parameters, and incrementation parameters).
This can help vectorization in some cases like #110609.
See also the discussion in
https://discourse.llvm.org/t/rfc-add-nsw-flags-to-arithmetic-integer-operations-using-the-option-fno-wrapv/77584/20.
Added:
Modified:
flang/lib/Lower/Bridge.cpp
flang/test/Lower/HLFIR/goto-do-body.f90
flang/test/Lower/goto-do-body.f90
flang/test/Lower/nsw.f90
Removed:
################################################################################
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 17b58604da3bf6..77003eff190e26 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2137,6 +2137,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
mlir::Location loc = toLocation();
mlir::Operation *boundsAndStepIP = nullptr;
+ mlir::arith::IntegerOverflowFlags iofBackup{};
for (IncrementLoopInfo &info : incrementLoopNestInfo) {
mlir::Value lowerValue;
@@ -2153,11 +2154,18 @@ class FirConverter : public Fortran::lower::AbstractConverter {
info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
info.isUnordered);
+ if (!getLoweringOptions().getIntegerWrapAround()) {
+ iofBackup = builder->getIntegerOverflowFlags();
+ builder->setIntegerOverflowFlags(
+ mlir::arith::IntegerOverflowFlags::nsw);
+ }
lowerValue = genControlValue(info.lowerExpr, info);
upperValue = genControlValue(info.upperExpr, info);
bool isConst = true;
stepValue = genControlValue(info.stepExpr, info,
info.isStructured() ? nullptr : &isConst);
+ if (!getLoweringOptions().getIntegerWrapAround())
+ builder->setIntegerOverflowFlags(iofBackup);
boundsAndStepIP = stepValue.getDefiningOp();
// Use a temp variable for unstructured loops with non-const step.
diff --git a/flang/test/Lower/HLFIR/goto-do-body.f90 b/flang/test/Lower/HLFIR/goto-do-body.f90
index 383b839e591e33..5e4bc2f3bc2820 100644
--- a/flang/test/Lower/HLFIR/goto-do-body.f90
+++ b/flang/test/Lower/HLFIR/goto-do-body.f90
@@ -83,8 +83,8 @@ subroutine sub2()
do i = 1, 2, 3 * j - 8
! CHECK: %[[TMP2:.*]] = fir.load %[[J]]#0 : !fir.ref<i32>
-! CHECK: %[[TMP3:.*]] = arith.muli %[[TMP2]], %[[C3]] : i32
-! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8]] : i32
+! CHECK: %[[TMP3:.*]] = arith.muli %[[TMP2]], %[[C3]] overflow<nsw> : i32
+! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8]] overflow<nsw> : i32
! CHECK: fir.store %[[STEP]] to %[[STEP_VAR:.*]] : !fir.ref<i32>
! CHECK: %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[C_7]] : i32
! CHECK: %[[TMP5:.*]] = arith.divsi %[[TMP4]], %[[STEP]] : i32
diff --git a/flang/test/Lower/goto-do-body.f90 b/flang/test/Lower/goto-do-body.f90
index 910e55f1839fd2..43afee1040914a 100644
--- a/flang/test/Lower/goto-do-body.f90
+++ b/flang/test/Lower/goto-do-body.f90
@@ -90,9 +90,9 @@ subroutine sub2()
! CHECK: %[[C2_2:.*]] = arith.constant 2 : i32
! CHECK: %[[C3_2:.*]] = arith.constant 3 : i32
! CHECK: %[[TMP2:.*]] = fir.load %[[J]] : !fir.ref<i32>
-! CHECK: %[[TMP3:.*]] = arith.muli %[[C3_2]], %[[TMP2]] : i32
+! CHECK: %[[TMP3:.*]] = arith.muli %[[C3_2]], %[[TMP2]] overflow<nsw> : i32
! CHECK: %[[C8_1:.*]] = arith.constant 8 : i32
-! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8_1]] : i32
+! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8_1]] overflow<nsw> : i32
! CHECK: fir.store %[[STEP]] to %[[STEP_VAR:.*]] : !fir.ref<i32>
! CHECK: %[[TMP4:.*]] = arith.subi %[[C2_2]], %[[C1_1]] : i32
! CHECK: %[[TMP5:.*]] = arith.addi %[[TMP4]], %[[STEP]] : i32
diff --git a/flang/test/Lower/nsw.f90 b/flang/test/Lower/nsw.f90
index 84435b71330427..4ee9e5da829e61 100644
--- a/flang/test/Lower/nsw.f90
+++ b/flang/test/Lower/nsw.f90
@@ -59,3 +59,98 @@ subroutine bitwise_comparison(a, b)
! CHECK-LABEL: func.func @_QPbitwise_comparison(
! CHECK-NOT: overflow<nsw>
! CHECK: return
+
+subroutine loop_params(a,lb,ub,st)
+ integer :: i, lb, ub, st
+ integer :: a(lb:ub)
+ do i = lb+1, ub-1, st*2
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPloop_params(
+! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_14]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i32) -> index
+! CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_16]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> index
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
+! CHECK: %[[VAL_30:.*]] = arith.muli %[[VAL_29]], %[[VAL_4]] overflow<nsw> : i32
+! CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> index
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_26]] : (index) -> i32
+! CHECK: %[[VAL_33:.*]]:2 = fir.do_loop %[[VAL_34:.*]] = %[[VAL_26]] to %[[VAL_28]] step %[[VAL_31]] iter_args(%[[VAL_35:.*]] = %[[VAL_32]]) -> (index, i32) {
+
+subroutine loop_params2(a,lb,ub,st)
+ integer :: i, lb, ub, st
+ integer :: a(lb:ub)
+ real :: ii
+ do ii = lb+1, ub-1, st*2
+ i = ii
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPloop_params2(
+! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_8:.*]] = fir.alloca index
+! CHECK: %[[VAL_9:.*]] = fir.alloca f32
+! CHECK: %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.declare %{{.*}}ii"} : (!fir.ref<f32>) -> !fir.ref<f32>
+! CHECK: %[[VAL_17:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_18]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> f32
+! CHECK: %[[VAL_31:.*]] = arith.subi %[[VAL_20]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> f32
+! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK: %[[VAL_34:.*]] = arith.muli %[[VAL_33]], %[[VAL_4]] overflow<nsw> : i32
+! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_34]] : (i32) -> f32
+! CHECK: fir.store %[[VAL_35]] to %[[VAL_9]] : !fir.ref<f32>
+! CHECK: %[[VAL_36:.*]] = arith.subf %[[VAL_32]], %[[VAL_30]] fastmath<contract> : f32
+! CHECK: %[[VAL_37:.*]] = arith.addf %[[VAL_36]], %[[VAL_35]] fastmath<contract> : f32
+! CHECK: %[[VAL_38:.*]] = arith.divf %[[VAL_37]], %[[VAL_35]] fastmath<contract> : f32
+! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (f32) -> index
+! CHECK: fir.store %[[VAL_39]] to %[[VAL_8]] : !fir.ref<index>
+! CHECK: fir.store %[[VAL_30]] to %[[VAL_16]] : !fir.ref<f32>
+! CHECK: cf.br ^bb1
+! CHECK: ^bb1:
+! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_8]] : !fir.ref<index>
+! CHECK: %[[VAL_41:.*]] = arith.cmpi sgt, %[[VAL_40]], %[[VAL_6]] : index
+! CHECK: cf.cond_br %[[VAL_41]], ^bb2, ^bb3
+! CHECK: ^bb2:
+
+subroutine loop_params3(a,lb,ub,st)
+ integer :: i, lb, ub, st
+ integer :: a(lb:ub)
+ do concurrent (i=lb+1:ub-1:st*2)
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPloop_params3(
+! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
+! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
+! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
+! CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_16]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> index
+! CHECK: %[[VAL_29:.*]] = arith.subi %[[VAL_18]], %[[VAL_5]] overflow<nsw> : i32
+! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> index
+! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_15]] : !fir.ref<i32>
+! CHECK: %[[VAL_32:.*]] = arith.muli %[[VAL_31]], %[[VAL_4]] overflow<nsw> : i32
+! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> index
+! CHECK: fir.do_loop %[[VAL_34:.*]] = %[[VAL_28]] to %[[VAL_30]] step %[[VAL_33]] unordered {
More information about the flang-commits
mailing list