[flang-commits] [clang] [flang] [flang] Add nsw flag to do-variable increment with a new option (PR #91579)

Tue May 14 00:57:14 PDT 2024

https://github.com/yus3710-fj updated https://github.com/llvm/llvm-project/pull/91579

>From f51cfbe1e50c7a1aa902c684f12a20d0fac39c21 Mon Sep 17 00:00:00 2001
From: Yusuke MINATO <minato.yusuke at fujitsu.com>
Date: Wed, 24 Apr 2024 14:42:21 +0900
Subject: [PATCH 1/3] [flang] Add nsw flag to do-variable increment with a new
 option

This patch adds nsw flag to the increment of do-variables when
a new option is enabled.
NOTE 11.10 in the Fortran 2018 standard says they never overflow.

See also the discussion in 74709 and
discourse post.
---
 clang/include/clang/Driver/Options.td         |   4 +
 clang/lib/Driver/ToolChains/Flang.cpp         |   1 +
 flang/include/flang/Lower/LoweringOptions.def |   4 +
 .../flang/Optimizer/Transforms/Passes.h       |   4 +-
 .../flang/Optimizer/Transforms/Passes.td      |   5 +-
 flang/include/flang/Tools/CLOptions.inc       |  13 +-
 flang/include/flang/Tools/CrossToolHelpers.h  |   1 +
 flang/lib/Frontend/CompilerInvocation.cpp     |   6 +
 flang/lib/Frontend/FrontendActions.cpp        |   3 +
 flang/lib/Lower/Bridge.cpp                    |  12 +-
 flang/lib/Lower/IO.cpp                        |   9 +-
 .../Transforms/ControlFlowConverter.cpp       |  44 +++-
 flang/test/Driver/frontend-forwarding.f90     |   2 +
 flang/test/Fir/loop01.fir                     | 211 ++++++++++++++++++
 flang/test/Lower/array-substring.f90          |  40 ++++
 flang/test/Lower/do_loop.f90                  |  42 ++++
 flang/test/Lower/do_loop_unstructured.f90     | 189 +++++++++++++++-
 flang/test/Lower/infinite_loop.f90            |  34 +++
 flang/test/Lower/io-implied-do-fixes.f90      |  51 ++++-
 flang/tools/bbc/bbc.cpp                       |   7 +
 20 files changed, 659 insertions(+), 23 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1429528975853..0d032076f7163 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6539,6 +6539,10 @@ def flang_deprecated_no_hlfir : Flag<["-"], "flang-deprecated-no-hlfir">,
   Flags<[HelpHidden]>, Visibility<[FlangOption, FC1Option]>,
   HelpText<"Do not use HLFIR lowering (deprecated)">;
 
+def flang_experimental_integer_overflow : Flag<["-"], "flang-experimental-integer-overflow">,
+  Flags<[HelpHidden]>, Visibility<[FlangOption, FC1Option]>,
+  HelpText<"Add nsw flag to internal operations such as do-variable increment (experimental)">;
+
 //===----------------------------------------------------------------------===//
 // FLangOption + CoreOption + NoXarchOption
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 436a9c418a5f9..0ae489a823078 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -139,6 +139,7 @@ void Flang::addCodegenOptions(const ArgList &Args,
 
   Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir,
                             options::OPT_flang_deprecated_no_hlfir,
+                            options::OPT_flang_experimental_integer_overflow,
                             options::OPT_fno_ppc_native_vec_elem_order,
                             options::OPT_fppc_native_vec_elem_order});
 }
diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def
index be080a4d29d73..839d2b46249b0 100644
--- a/flang/include/flang/Lower/LoweringOptions.def
+++ b/flang/include/flang/Lower/LoweringOptions.def
@@ -34,5 +34,9 @@ ENUM_LOWERINGOPT(NoPPCNativeVecElemOrder, unsigned, 1, 0)
 /// On by default.
 ENUM_LOWERINGOPT(Underscoring, unsigned, 1, 1)
 
+/// If true, add nsw flags to arithmetic operations for integer.
+/// Off by default.
+ENUM_LOWERINGOPT(NoSignedWrap, unsigned, 1, 0)
+
 #undef LOWERINGOPT
 #undef ENUM_LOWERINGOPT
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 470ed8a125ac4..496201a04e29c 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -54,6 +54,7 @@ namespace fir {
 std::unique_ptr<mlir::Pass> createAffineDemotionPass();
 std::unique_ptr<mlir::Pass>
 createArrayValueCopyPass(fir::ArrayValueCopyOptions options = {});
+std::unique_ptr<mlir::Pass> createCFGConversionPassWithNSW();
 std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
 std::unique_ptr<mlir::Pass>
 createExternalNameConversionPass(bool appendUnderscore);
@@ -89,7 +90,8 @@ createFunctionAttrPass(FunctionAttrTypes &functionAttr, bool noInfsFPMath,
                        bool noSignedZerosFPMath, bool unsafeFPMath);
 
 void populateCfgConversionRewrites(mlir::RewritePatternSet &patterns,
-                                   bool forceLoopToExecuteOnce = false);
+                                   bool forceLoopToExecuteOnce = false,
+                                   bool setNSW = false);
 
 // declarative passes
 #define GEN_PASS_REGISTRATION
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 1eaaa32a508a0..ecbf8d5577b04 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -151,7 +151,10 @@ def CFGConversion : Pass<"cfg-conversion"> {
   let options = [
     Option<"forceLoopToExecuteOnce", "always-execute-loop-body", "bool",
            /*default=*/"false",
-           "force the body of a loop to execute at least once">
+           "force the body of a loop to execute at least once">,
+    Option<"setNSW", "set-nsw", "bool",
+           /*default=*/"false",
+           "set nsw on loop variable increment">
   ];
 }
 
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 79a2a4f63cfcf..5ad7df714d348 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -148,9 +148,14 @@ static void addCanonicalizerPassWithoutRegionSimplification(
   pm.addPass(mlir::createCanonicalizerPass(config));
 }
 
-inline void addCfgConversionPass(mlir::PassManager &pm) {
-  addNestedPassToAllTopLevelOperationsConditionally(
-      pm, disableCfgConversion, fir::createCFGConversion);
+inline void addCfgConversionPass(
+    mlir::PassManager &pm, const MLIRToLLVMPassPipelineConfig &config) {
+  if (config.NoSignedWrap)
+    addNestedPassToAllTopLevelOperationsConditionally(
+        pm, disableCfgConversion, fir::createCFGConversionPassWithNSW);
+  else
+    addNestedPassToAllTopLevelOperationsConditionally(
+        pm, disableCfgConversion, fir::createCFGConversion);
 }
 
 inline void addAVC(
@@ -290,7 +295,7 @@ inline void createDefaultFIROptimizerPassPipeline(
     pm.addPass(fir::createAliasTagsPass());
 
   // convert control flow to CFG form
-  fir::addCfgConversionPass(pm);
+  fir::addCfgConversionPass(pm, pc);
   pm.addPass(mlir::createConvertSCFToCFPass());
 
   pm.addPass(mlir::createCanonicalizerPass(config));
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index f79520707714d..583daa30289d6 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -122,6 +122,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
   bool NoSignedZerosFPMath =
       false; ///< Set no-signed-zeros-fp-math attribute for functions.
   bool UnsafeFPMath = false; ///< Set unsafe-fp-math attribute for functions.
+  bool NoSignedWrap = false; ///< Add nsw flag to numeric operations.
 };
 
 struct OffloadModuleOpts {
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 4318286e74152..f72f181bade9c 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1203,6 +1203,12 @@ bool CompilerInvocation::createFromArgs(
     invoc.loweringOpts.setNoPPCNativeVecElemOrder(true);
   }
 
+  // -flang-experimental-integer-overflow
+  if (args.hasArg(
+          clang::driver::options::OPT_flang_experimental_integer_overflow)) {
+    invoc.loweringOpts.setNoSignedWrap(true);
+  }
+
   // Preserve all the remark options requested, i.e. -Rpass, -Rpass-missed or
   // -Rpass-analysis. This will be used later when processing and outputting the
   // remarks generated by LLVM in ExecuteCompilerInvocation.cpp.
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 2f65ab6102f4d..deced43462607 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -809,6 +809,9 @@ void CodeGenAction::generateLLVMIR() {
     config.VScaleMax = vsr->second;
   }
 
+  if (ci.getInvocation().getLoweringOpts().getNoSignedWrap())
+    config.NoSignedWrap = true;
+
   // Create the pass pipeline
   fir::createMLIRToLLVMPassPipeline(pm, config, getCurrentFile());
   (void)mlir::applyPassManagerCLOptions(pm);
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 4902886712e92..58460a9cfb6cf 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2007,6 +2007,11 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   void genFIRIncrementLoopEnd(IncrementLoopNestInfo &incrementLoopNestInfo) {
     assert(!incrementLoopNestInfo.empty() && "empty loop nest");
     mlir::Location loc = toLocation();
+    mlir::arith::IntegerOverflowFlags flags{};
+    if (getLoweringOptions().getNoSignedWrap())
+      flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw);
+    auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get(
+        builder->getContext(), flags);
     for (auto it = incrementLoopNestInfo.rbegin(),
               rend = incrementLoopNestInfo.rend();
          it != rend; ++it) {
@@ -2021,7 +2026,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         builder->setInsertionPointToEnd(info.doLoop.getBody());
         llvm::SmallVector<mlir::Value, 2> results;
         results.push_back(builder->create<mlir::arith::AddIOp>(
-            loc, info.doLoop.getInductionVar(), info.doLoop.getStep()));
+            loc, info.doLoop.getInductionVar(), info.doLoop.getStep(),
+            iofAttr));
         // Step loopVariable to help optimizations such as vectorization.
         // Induction variable elimination will clean up as necessary.
         mlir::Value step = builder->createConvert(
@@ -2029,7 +2035,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         mlir::Value loopVar =
             builder->create<fir::LoadOp>(loc, info.loopVariable);
         results.push_back(
-            builder->create<mlir::arith::AddIOp>(loc, loopVar, step));
+            builder->create<mlir::arith::AddIOp>(loc, loopVar, step, iofAttr));
         builder->create<fir::ResultOp>(loc, results);
         builder->setInsertionPointAfter(info.doLoop);
         // The loop control variable may be used after the loop.
@@ -2054,7 +2060,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       if (info.hasRealControl)
         value = builder->create<mlir::arith::AddFOp>(loc, value, step);
       else
-        value = builder->create<mlir::arith::AddIOp>(loc, value, step);
+        value = builder->create<mlir::arith::AddIOp>(loc, value, step, iofAttr);
       builder->create<fir::StoreOp>(loc, value, info.loopVariable);
 
       genBranch(info.headerBlock);
diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp
index ed0afad9197df..cc0347a4771eb 100644
--- a/flang/lib/Lower/IO.cpp
+++ b/flang/lib/Lower/IO.cpp
@@ -928,6 +928,11 @@ static void genIoLoop(Fortran::lower::AbstractConverter &converter,
   Fortran::lower::StatementContext stmtCtx;
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   mlir::Location loc = converter.getCurrentLocation();
+  mlir::arith::IntegerOverflowFlags flags{};
+  if (converter.getLoweringOptions().getNoSignedWrap())
+    flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw);
+  auto iofAttr =
+      mlir::arith::IntegerOverflowFlagsAttr::get(builder.getContext(), flags);
   makeNextConditionalOn(builder, loc, checkResult, ok, inLoop);
   const auto &itemList = std::get<0>(ioImpliedDo.t);
   const auto &control = std::get<1>(ioImpliedDo.t);
@@ -965,7 +970,7 @@ static void genIoLoop(Fortran::lower::AbstractConverter &converter,
     genItemList(ioImpliedDo);
     builder.setInsertionPointToEnd(doLoopOp.getBody());
     mlir::Value result = builder.create<mlir::arith::AddIOp>(
-        loc, doLoopOp.getInductionVar(), doLoopOp.getStep());
+        loc, doLoopOp.getInductionVar(), doLoopOp.getStep(), iofAttr);
     builder.create<fir::ResultOp>(loc, result);
     builder.setInsertionPointAfter(doLoopOp);
     // The loop control variable may be used after the loop.
@@ -1007,7 +1012,7 @@ static void genIoLoop(Fortran::lower::AbstractConverter &converter,
   mlir::OpResult iterateResult = builder.getBlock()->back().getResult(0);
   mlir::Value inductionResult0 = iterWhileOp.getInductionVar();
   auto inductionResult1 = builder.create<mlir::arith::AddIOp>(
-      loc, inductionResult0, iterWhileOp.getStep());
+      loc, inductionResult0, iterWhileOp.getStep(), iofAttr);
   auto inductionResult = builder.create<mlir::arith::SelectOp>(
       loc, iterateResult, inductionResult1, inductionResult0);
   llvm::SmallVector<mlir::Value> results = {inductionResult, iterateResult};
diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
index a62f6cde0e09b..a233e7fbdcd1e 100644
--- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
+++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
@@ -43,14 +43,19 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
 public:
   using OpRewritePattern::OpRewritePattern;
 
-  CfgLoopConv(mlir::MLIRContext *ctx, bool forceLoopToExecuteOnce)
+  CfgLoopConv(mlir::MLIRContext *ctx, bool forceLoopToExecuteOnce, bool setNSW)
       : mlir::OpRewritePattern<fir::DoLoopOp>(ctx),
-        forceLoopToExecuteOnce(forceLoopToExecuteOnce) {}
+        forceLoopToExecuteOnce(forceLoopToExecuteOnce), setNSW(setNSW) {}
 
   mlir::LogicalResult
   matchAndRewrite(DoLoopOp loop,
                   mlir::PatternRewriter &rewriter) const override {
     auto loc = loop.getLoc();
+    mlir::arith::IntegerOverflowFlags flags{};
+    if (setNSW)
+      flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw);
+    auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get(
+        rewriter.getContext(), flags);
 
     // Create the start and end blocks that will wrap the DoLoopOp with an
     // initalizer and an end point
@@ -104,7 +109,7 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
     rewriter.setInsertionPointToEnd(lastBlock);
     auto iv = conditionalBlock->getArgument(0);
     mlir::Value steppedIndex =
-        rewriter.create<mlir::arith::AddIOp>(loc, iv, step);
+        rewriter.create<mlir::arith::AddIOp>(loc, iv, step, iofAttr);
     assert(steppedIndex && "must be a Value");
     auto lastArg = conditionalBlock->getNumArguments() - 1;
     auto itersLeft = conditionalBlock->getArgument(lastArg);
@@ -142,6 +147,7 @@ class CfgLoopConv : public mlir::OpRewritePattern<fir::DoLoopOp> {
 
 private:
   bool forceLoopToExecuteOnce;
+  bool setNSW;
 };
 
 /// Convert `fir.if` to control-flow
@@ -149,7 +155,7 @@ class CfgIfConv : public mlir::OpRewritePattern<fir::IfOp> {
 public:
   using OpRewritePattern::OpRewritePattern;
 
-  CfgIfConv(mlir::MLIRContext *ctx, bool forceLoopToExecuteOnce)
+  CfgIfConv(mlir::MLIRContext *ctx, bool forceLoopToExecuteOnce, bool setNSW)
       : mlir::OpRewritePattern<fir::IfOp>(ctx) {}
 
   mlir::LogicalResult
@@ -214,13 +220,19 @@ class CfgIterWhileConv : public mlir::OpRewritePattern<fir::IterWhileOp> {
 public:
   using OpRewritePattern::OpRewritePattern;
 
-  CfgIterWhileConv(mlir::MLIRContext *ctx, bool forceLoopToExecuteOnce)
-      : mlir::OpRewritePattern<fir::IterWhileOp>(ctx) {}
+  CfgIterWhileConv(mlir::MLIRContext *ctx, bool forceLoopToExecuteOnce,
+                   bool setNSW)
+      : mlir::OpRewritePattern<fir::IterWhileOp>(ctx), setNSW(setNSW) {}
 
   mlir::LogicalResult
   matchAndRewrite(fir::IterWhileOp whileOp,
                   mlir::PatternRewriter &rewriter) const override {
     auto loc = whileOp.getLoc();
+    mlir::arith::IntegerOverflowFlags flags{};
+    if (setNSW)
+      flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw);
+    auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get(
+        rewriter.getContext(), flags);
 
     // Start by splitting the block containing the 'fir.do_loop' into two parts.
     // The part before will get the init code, the part after will be the end
@@ -248,7 +260,8 @@ class CfgIterWhileConv : public mlir::OpRewritePattern<fir::IterWhileOp> {
     auto *terminator = lastBodyBlock->getTerminator();
     rewriter.setInsertionPointToEnd(lastBodyBlock);
     auto step = whileOp.getStep();
-    mlir::Value stepped = rewriter.create<mlir::arith::AddIOp>(loc, iv, step);
+    mlir::Value stepped =
+        rewriter.create<mlir::arith::AddIOp>(loc, iv, step, iofAttr);
     assert(stepped && "must be a Value");
 
     llvm::SmallVector<mlir::Value> loopCarried;
@@ -305,6 +318,9 @@ class CfgIterWhileConv : public mlir::OpRewritePattern<fir::IterWhileOp> {
     rewriter.replaceOp(whileOp, args);
     return success();
   }
+
+private:
+  bool setNSW;
 };
 
 /// Convert FIR structured control flow ops to CFG ops.
@@ -312,10 +328,13 @@ class CfgConversion : public fir::impl::CFGConversionBase<CfgConversion> {
 public:
   using CFGConversionBase<CfgConversion>::CFGConversionBase;
 
+  CfgConversion(bool setNSW) { this->setNSW = setNSW; }
+
   void runOnOperation() override {
     auto *context = &this->getContext();
     mlir::RewritePatternSet patterns(context);
-    fir::populateCfgConversionRewrites(patterns, this->forceLoopToExecuteOnce);
+    fir::populateCfgConversionRewrites(patterns, this->forceLoopToExecuteOnce,
+                                       this->setNSW);
     mlir::ConversionTarget target(*context);
     target.addLegalDialect<mlir::affine::AffineDialect,
                            mlir::cf::ControlFlowDialect, FIROpsDialect,
@@ -337,7 +356,12 @@ class CfgConversion : public fir::impl::CFGConversionBase<CfgConversion> {
 
 /// Expose conversion rewriters to other passes
 void fir::populateCfgConversionRewrites(mlir::RewritePatternSet &patterns,
-                                        bool forceLoopToExecuteOnce) {
+                                        bool forceLoopToExecuteOnce,
+                                        bool setNSW) {
   patterns.insert<CfgLoopConv, CfgIfConv, CfgIterWhileConv>(
-      patterns.getContext(), forceLoopToExecuteOnce);
+      patterns.getContext(), forceLoopToExecuteOnce, setNSW);
+}
+
+std::unique_ptr<mlir::Pass> fir::createCFGConversionPassWithNSW() {
+  return std::make_unique<CfgConversion>(true);
 }
diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90
index eac9773ce25c7..35adb47b56861 100644
--- a/flang/test/Driver/frontend-forwarding.f90
+++ b/flang/test/Driver/frontend-forwarding.f90
@@ -19,6 +19,7 @@
 ! RUN:     -fversion-loops-for-stride \
 ! RUN:     -flang-experimental-hlfir \
 ! RUN:     -flang-deprecated-no-hlfir \
+! RUN:     -flang-experimental-integer-overflow \
 ! RUN:     -fno-ppc-native-vector-element-order \
 ! RUN:     -fppc-native-vector-element-order \
 ! RUN:     -mllvm -print-before-all \
@@ -50,6 +51,7 @@
 ! CHECK: "-fversion-loops-for-stride"
 ! CHECK: "-flang-experimental-hlfir"
 ! CHECK: "-flang-deprecated-no-hlfir"
+! CHECK: "-flang-experimental-integer-overflow"
 ! CHECK: "-fno-ppc-native-vector-element-order"
 ! CHECK: "-fppc-native-vector-element-order"
 ! CHECK: "-Rpass"
diff --git a/flang/test/Fir/loop01.fir b/flang/test/Fir/loop01.fir
index 72ca1c3989e45..c1cbb522c378c 100644
--- a/flang/test/Fir/loop01.fir
+++ b/flang/test/Fir/loop01.fir
@@ -1,4 +1,5 @@
 // RUN: fir-opt --split-input-file --cfg-conversion %s | FileCheck %s
+// RUN: fir-opt --split-input-file --cfg-conversion="set-nsw=true" %s | FileCheck %s --check-prefix=NSW
 
 func.func @x(%lb : index, %ub : index, %step : index, %b : i1, %addr : !fir.ref<index>) {
   fir.do_loop %iv = %lb to %ub step %step unordered {
@@ -43,6 +44,34 @@ func.func private @f2() -> i1
 // CHECK:     }
 // CHECK:     func private @f2() -> i1
 
+// NSW:     func @x(%[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index, %[[VAL_3:.*]]: i1, %[[VAL_4:.*]]: !fir.ref<index>) {
+// NSW:       %[[VAL_5:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index
+// NSW:       %[[VAL_6:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] : index
+// NSW:       %[[VAL_7:.*]] = arith.divsi %[[VAL_6]], %[[VAL_2]] : index
+// NSW:       br ^bb1(%[[VAL_0]], %[[VAL_7]] : index, index)
+// NSW:     ^bb1(%[[VAL_8:.*]]: index, %[[VAL_9:.*]]: index):
+// NSW:       %[[VAL_10:.*]] = arith.constant 0 : index
+// NSW:       %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index
+// NSW:       cond_br %[[VAL_11]], ^bb2, ^bb6
+// NSW:     ^bb2:
+// NSW:       cond_br %[[VAL_3]], ^bb3, ^bb4
+// NSW:     ^bb3:
+// NSW:       fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref<index>
+// NSW:       br ^bb5
+// NSW:     ^bb4:
+// NSW:       %[[VAL_12:.*]] = arith.constant 0 : index
+// NSW:       fir.store %[[VAL_12]] to %[[VAL_4]] : !fir.ref<index>
+// NSW:       br ^bb5
+// NSW:     ^bb5:
+// NSW:       %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] overflow<nsw> : index
+// NSW:       %[[VAL_14:.*]] = arith.constant 1 : index
+// NSW:       %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index
+// NSW:       br ^bb1(%[[VAL_13]], %[[VAL_15]] : index, index)
+// NSW:     ^bb6:
+// NSW:       return
+// NSW:     }
+// NSW:     func private @f2() -> i1
+
 // -----
 
 func.func @x2(%lo : index, %up : index, %ok : i1) {
@@ -79,6 +108,29 @@ func.func private @f3(i16)
 // CHECK:   }
 // CHECK:   func private @f3(i16)
 
+// NSW:   func @x2(%[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: i1) {
+// NSW:     %[[VAL_3:.*]] = arith.constant 1 : index
+// NSW:     br ^bb1(%[[VAL_0]], %[[VAL_2]] : index, i1)
+// NSW:   ^bb1(%[[VAL_4:.*]]: index, %[[VAL_5:.*]]: i1):
+// NSW:     %[[VAL_6:.*]] = arith.constant 0 : index
+// NSW:     %[[VAL_7:.*]] = arith.cmpi slt, %[[VAL_6]], %[[VAL_3]] : index
+// NSW:     %[[VAL_8:.*]] = arith.cmpi sle, %[[VAL_4]], %[[VAL_1]] : index
+// NSW:     %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_3]], %[[VAL_6]] : index
+// NSW:     %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_4]] : index
+// NSW:     %[[VAL_11:.*]] = arith.andi %[[VAL_7]], %[[VAL_8]] : i1
+// NSW:     %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1
+// NSW:     %[[VAL_13:.*]] = arith.ori %[[VAL_11]], %[[VAL_12]] : i1
+// NSW:     %[[VAL_14:.*]] = arith.andi %[[VAL_5]], %[[VAL_13]] : i1
+// NSW:     cond_br %[[VAL_14]], ^bb2, ^bb3
+// NSW:   ^bb2:
+// NSW:     %[[VAL_15:.*]] = fir.call @f2() : () -> i1
+// NSW:     %[[VAL_16:.*]] = arith.addi %[[VAL_4]], %[[VAL_3]] overflow<nsw> : index
+// NSW:     br ^bb1(%[[VAL_16]], %[[VAL_15]] : index, i1)
+// NSW:   ^bb3:
+// NSW:     return
+// NSW:   }
+// NSW:   func private @f3(i16)
+
 // -----
 
 // do_loop with an extra loop-carried value
@@ -115,6 +167,29 @@ func.func @x3(%lo : index, %up : index) -> i1 {
 // CHECK:           return %[[VAL_8]] : i1
 // CHECK:         }
 
+// NSW-LABEL:   func @x3(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> i1 {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant true
+// NSW:           %[[VAL_4:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index
+// NSW:           %[[VAL_5:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] : index
+// NSW:           %[[VAL_6:.*]] = arith.divsi %[[VAL_5]], %[[VAL_2]] : index
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_3]], %[[VAL_6]] : index, i1, index)
+// NSW:         ^bb1(%[[VAL_7:.*]]: index, %[[VAL_8:.*]]: i1, %[[VAL_9:.*]]: index):
+// NSW:           %[[VAL_10:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index
+// NSW:           cond_br %[[VAL_11]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_12:.*]] = fir.call @f2() : () -> i1
+// NSW:           %[[VAL_13:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           %[[VAL_14:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index
+// NSW:           br ^bb1(%[[VAL_13]], %[[VAL_12]], %[[VAL_15]] : index, i1, index)
+// NSW:         ^bb3:
+// NSW:           return %[[VAL_8]] : i1
+// NSW:         }
+
 // -----
 
 // iterate_while with an extra loop-carried value
@@ -160,6 +235,34 @@ func.func private @f4(i32) -> i1
 // CHECK:         }
 // CHECK:         func private @f4(i32) -> i1
 
+// NSW-LABEL:   func @y3(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> i1 {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant true
+// NSW:           %[[VAL_4:.*]] = fir.call @f2() : () -> i1
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_3]], %[[VAL_4]] : index, i1, i1)
+// NSW:         ^bb1(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: i1, %[[VAL_7:.*]]: i1):
+// NSW:           %[[VAL_8:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_8]], %[[VAL_2]] : index
+// NSW:           %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_5]], %[[VAL_1]] : index
+// NSW:           %[[VAL_11:.*]] = arith.cmpi slt, %[[VAL_2]], %[[VAL_8]] : index
+// NSW:           %[[VAL_12:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_5]] : index
+// NSW:           %[[VAL_13:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1
+// NSW:           %[[VAL_14:.*]] = arith.andi %[[VAL_11]], %[[VAL_12]] : i1
+// NSW:           %[[VAL_15:.*]] = arith.ori %[[VAL_13]], %[[VAL_14]] : i1
+// NSW:           %[[VAL_16:.*]] = arith.andi %[[VAL_6]], %[[VAL_15]] : i1
+// NSW:           cond_br %[[VAL_16]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_17:.*]] = fir.call @f2() : () -> i1
+// NSW:           %[[VAL_18:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           br ^bb1(%[[VAL_18]], %[[VAL_6]], %[[VAL_17]] : index, i1, i1)
+// NSW:         ^bb3:
+// NSW:           %[[VAL_19:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1
+// NSW:           return %[[VAL_19]] : i1
+// NSW:         }
+// NSW:         func private @f4(i32) -> i1
+
 // -----
 
 // do_loop that returns the final value of the induction
@@ -196,6 +299,29 @@ func.func @x4(%lo : index, %up : index) -> index {
 // CHECK:           return %[[VAL_6]] : index
 // CHECK:         }
 
+// NSW-LABEL:   func @x4(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> index {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index
+// NSW:           %[[VAL_4:.*]] = arith.addi %[[VAL_3]], %[[VAL_2]] : index
+// NSW:           %[[VAL_5:.*]] = arith.divsi %[[VAL_4]], %[[VAL_2]] : index
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_5]] : index, index)
+// NSW:         ^bb1(%[[VAL_6:.*]]: index, %[[VAL_7:.*]]: index):
+// NSW:           %[[VAL_8:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_7]], %[[VAL_8]] : index
+// NSW:           cond_br %[[VAL_9]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_10:.*]] = fir.convert %[[VAL_6]] : (index) -> i32
+// NSW:           %[[VAL_11:.*]] = fir.call @f4(%[[VAL_10]]) : (i32) -> i1
+// NSW:           %[[VAL_12:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           %[[VAL_13:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_14:.*]] = arith.subi %[[VAL_7]], %[[VAL_13]] : index
+// NSW:           br ^bb1(%[[VAL_12]], %[[VAL_14]] : index, index)
+// NSW:         ^bb3:
+// NSW:           return %[[VAL_6]] : index
+// NSW:         }
+
 // -----
 
 // iterate_while that returns the final value of both inductions
@@ -236,6 +362,32 @@ func.func @y4(%lo : index, %up : index) -> index {
 // CHECK:           return %[[VAL_4]] : index
 // CHECK:         }
 
+// NSW-LABEL:   func @y4(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> index {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant true
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_3]] : index, i1)
+// NSW:         ^bb1(%[[VAL_4:.*]]: index, %[[VAL_5:.*]]: i1):
+// NSW:           %[[VAL_6:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_7:.*]] = arith.cmpi slt, %[[VAL_6]], %[[VAL_2]] : index
+// NSW:           %[[VAL_8:.*]] = arith.cmpi sle, %[[VAL_4]], %[[VAL_1]] : index
+// NSW:           %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_2]], %[[VAL_6]] : index
+// NSW:           %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_4]] : index
+// NSW:           %[[VAL_11:.*]] = arith.andi %[[VAL_7]], %[[VAL_8]] : i1
+// NSW:           %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1
+// NSW:           %[[VAL_13:.*]] = arith.ori %[[VAL_11]], %[[VAL_12]] : i1
+// NSW:           %[[VAL_14:.*]] = arith.andi %[[VAL_5]], %[[VAL_13]] : i1
+// NSW:           cond_br %[[VAL_14]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_15:.*]] = fir.convert %[[VAL_4]] : (index) -> i32
+// NSW:           %[[VAL_16:.*]] = fir.call @f4(%[[VAL_15]]) : (i32) -> i1
+// NSW:           %[[VAL_17:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           br ^bb1(%[[VAL_17]], %[[VAL_16]] : index, i1)
+// NSW:         ^bb3:
+// NSW:           return %[[VAL_4]] : index
+// NSW:         }
+
 // -----
 
 // do_loop that returns the final induction value
@@ -277,6 +429,31 @@ func.func @x5(%lo : index, %up : index) -> index {
 // CHECK:           return %[[VAL_7]] : index
 // CHECK:         }
 
+// NSW-LABEL:   func @x5(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> index {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant 42 : i16
+// NSW:           %[[VAL_4:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index
+// NSW:           %[[VAL_5:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] : index
+// NSW:           %[[VAL_6:.*]] = arith.divsi %[[VAL_5]], %[[VAL_2]] : index
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_3]], %[[VAL_6]] : index, i16, index)
+// NSW:         ^bb1(%[[VAL_7:.*]]: index, %[[VAL_8:.*]]: i16, %[[VAL_9:.*]]: index):
+// NSW:           %[[VAL_10:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index
+// NSW:           cond_br %[[VAL_11]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_12:.*]] = fir.call @f2() : () -> i1
+// NSW:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> i16
+// NSW:           %[[VAL_14:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           %[[VAL_15:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_16:.*]] = arith.subi %[[VAL_9]], %[[VAL_15]] : index
+// NSW:           br ^bb1(%[[VAL_14]], %[[VAL_13]], %[[VAL_16]] : index, i16, index)
+// NSW:         ^bb3:
+// NSW:           fir.call @f3(%[[VAL_8]]) : (i16) -> ()
+// NSW:           return %[[VAL_7]] : index
+// NSW:         }
+
 // -----
 
 // iterate_while that returns the both induction values
@@ -331,3 +508,37 @@ func.func @y5(%lo : index, %up : index) -> index {
 // CHECK:           fir.call @f3(%[[VAL_7]]) : (i16) -> ()
 // CHECK:           return %[[VAL_5]] : index
 // CHECK:         }
+
+// NSW-LABEL:   func @y5(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> index {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant 42 : i16
+// NSW:           %[[VAL_4:.*]] = arith.constant true
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_4]], %[[VAL_3]] : index, i1, i16)
+// NSW:         ^bb1(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: i1, %[[VAL_7:.*]]: i16):
+// NSW:           %[[VAL_8:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_8]], %[[VAL_2]] : index
+// NSW:           %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_5]], %[[VAL_1]] : index
+// NSW:           %[[VAL_11:.*]] = arith.cmpi slt, %[[VAL_2]], %[[VAL_8]] : index
+// NSW:           %[[VAL_12:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_5]] : index
+// NSW:           %[[VAL_13:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1
+// NSW:           %[[VAL_14:.*]] = arith.andi %[[VAL_11]], %[[VAL_12]] : i1
+// NSW:           %[[VAL_15:.*]] = arith.ori %[[VAL_13]], %[[VAL_14]] : i1
+// NSW:           %[[VAL_16:.*]] = arith.andi %[[VAL_6]], %[[VAL_15]] : i1
+// NSW:           cond_br %[[VAL_16]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_17:.*]] = fir.call @f2() : () -> i1
+// NSW:           %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i1) -> i16
+// NSW:           %[[VAL_19:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           br ^bb1(%[[VAL_19]], %[[VAL_17]], %[[VAL_18]] : index, i1, i16)
+// NSW:         ^bb3:
+// NSW:           cond_br %[[VAL_6]], ^bb4, ^bb5
+// NSW:         ^bb4:
+// NSW:           %[[VAL_20:.*]] = arith.constant 0 : i32
+// NSW:           %[[VAL_21:.*]] = fir.call @f4(%[[VAL_20]]) : (i32) -> i1
+// NSW:           br ^bb5
+// NSW:         ^bb5:
+// NSW:           fir.call @f3(%[[VAL_7]]) : (i16) -> ()
+// NSW:           return %[[VAL_5]] : index
+// NSW:         }
diff --git a/flang/test/Lower/array-substring.f90 b/flang/test/Lower/array-substring.f90
index 421c4b28ac8f8..2e283997e3e00 100644
--- a/flang/test/Lower/array-substring.f90
+++ b/flang/test/Lower/array-substring.f90
@@ -1,4 +1,5 @@
 ! RUN: bbc -hlfir=false %s -o - | FileCheck %s
+! RUN: bbc -hlfir=false -integer-overflow %s -o - | FileCheck %s --check-prefix=NSW
 
 ! CHECK-LABEL: func @_QPtest(
 ! CHECK-SAME:     %[[VAL_0:.*]]: !fir.boxchar<1>{{.*}}) -> !fir.array<1x!fir.logical<4>> {
@@ -45,3 +46,42 @@ function test(C)
 
   test = C(1:1)(1:8) == (/'ABCDabcd'/) 
 end function test
+
+! NSW-LABEL: func @_QPtest(
+! NSW-SAME:     %[[VAL_0:.*]]: !fir.boxchar<1>{{.*}}) -> !fir.array<1x!fir.logical<4>> {
+! NSW-DAG:         %[[VAL_1:.*]] = arith.constant 1 : index
+! NSW-DAG:         %[[VAL_2:.*]] = arith.constant 0 : index
+! NSW-DAG:         %[[VAL_3:.*]] = arith.constant 0 : i32
+! NSW-DAG:         %[[VAL_4:.*]] = arith.constant 8 : index
+! NSW:         %[[VAL_6:.*]]:2 = fir.unboxchar %[[VAL_0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! NSW:         %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<1x!fir.char<1,12>>>
+! NSW:         %[[VAL_8:.*]] = fir.alloca !fir.array<1x!fir.logical<4>> {bindc_name = "test", uniq_name = "_QFtestEtest"}
+! NSW:         %[[VAL_9:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+! NSW:         %[[VAL_10:.*]] = fir.slice %[[VAL_1]], %[[VAL_1]], %[[VAL_1]] : (index, index, index) -> !fir.slice<1>
+! NSW:         %[[VAL_11:.*]] = fir.address_of(@_QQ{{.*}}) : !fir.ref<!fir.array<1x!fir.char<1,8>>>
+! NSW:         br ^bb1(%[[VAL_2]], %[[VAL_1]] : index, index)
+! NSW:       ^bb1(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index):
+! NSW:         %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_2]] : index
+! NSW:         cond_br %[[VAL_14]], ^bb2, ^bb3
+! NSW:       ^bb2:
+! NSW:         %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] : index
+! NSW:         %[[VAL_16:.*]] = fir.array_coor %[[VAL_7]](%[[VAL_9]]) {{\[}}%[[VAL_10]]] %[[VAL_15]] : (!fir.ref<!fir.array<1x!fir.char<1,12>>>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref<!fir.char<1,12>>
+! NSW:         %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (!fir.ref<!fir.char<1,12>>) -> !fir.ref<!fir.array<12x!fir.char<1>>>
+! NSW:         %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_17]], %[[VAL_2]] : (!fir.ref<!fir.array<12x!fir.char<1>>>, index) -> !fir.ref<!fir.char<1>>
+! NSW:         %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (!fir.ref<!fir.char<1>>) -> !fir.ref<!fir.char<1,?>>
+! NSW:         %[[VAL_20:.*]] = fir.array_coor %[[VAL_11]](%[[VAL_9]]) %[[VAL_15]] : (!fir.ref<!fir.array<1x!fir.char<1,8>>>, !fir.shape<1>, index) -> !fir.ref<!fir.char<1,8>>
+! NSW:         %[[VAL_21:.*]] = fir.convert %[[VAL_19]] : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! NSW:         %[[VAL_22:.*]] = fir.convert %[[VAL_20]] : (!fir.ref<!fir.char<1,8>>) -> !fir.ref<i8>
+! NSW:         %[[VAL_23:.*]] = fir.convert %[[VAL_4]] : (index) -> i64
+! NSW:         %[[VAL_24:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_21]], %[[VAL_22]], %[[VAL_23]], %[[VAL_23]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
+! NSW:         %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_24]], %[[VAL_3]] : i32
+! NSW:         %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! NSW:         %[[VAL_27:.*]] = fir.array_coor %[[VAL_8]](%[[VAL_9]]) %[[VAL_15]] : (!fir.ref<!fir.array<1x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+! NSW:         fir.store %[[VAL_26]] to %[[VAL_27]] : !fir.ref<!fir.logical<4>>
+! NSW:         %[[VAL_15_NSW:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] overflow<nsw> : index
+! NSW:         %[[VAL_28:.*]] = arith.subi %[[VAL_13]], %[[VAL_1]] : index
+! NSW:         br ^bb1(%[[VAL_15_NSW]], %[[VAL_28]] : index, index)
+! NSW:       ^bb3:
+! NSW:         %[[VAL_29:.*]] = fir.load %[[VAL_8]] : !fir.ref<!fir.array<1x!fir.logical<4>>>
+! NSW:         return %[[VAL_29]] : !fir.array<1x!fir.logical<4>>
+! NSW:       }
diff --git a/flang/test/Lower/do_loop.f90 b/flang/test/Lower/do_loop.f90
index d9c83658ee25b..a46e6c947391b 100644
--- a/flang/test/Lower/do_loop.f90
+++ b/flang/test/Lower/do_loop.f90
@@ -1,5 +1,6 @@
 ! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false -o - %s | FileCheck %s
 ! RUN: %flang_fc1 -mllvm --use-desc-for-alloc=false -emit-fir -flang-deprecated-no-hlfir -o - %s | FileCheck %s
+! RUN: %flang_fc1 -mllvm --use-desc-for-alloc=false -emit-fir -flang-deprecated-no-hlfir -flang-experimental-integer-overflow -o - %s | FileCheck %s --check-prefix=NSW
 
 ! Simple tests for structured ordered loops with loop-control.
 ! Tests the structure of the loop, storage to index variable and return and 
@@ -7,8 +8,10 @@
 
 ! Test a simple loop with the final value of the index variable read outside the loop
 ! CHECK-LABEL: simple_loop
+! NSW-LABEL:   simple_loop
 subroutine simple_loop
   ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_loopEi"}
+  ! NSW:   %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_loopEi"}
   integer :: i
 
   ! CHECK: %[[C1:.*]] = arith.constant 1 : i32
@@ -18,14 +21,18 @@ subroutine simple_loop
   ! CHECK: %[[C1:.*]] = arith.constant 1 : index
   ! CHECK: %[[LB:.*]] = fir.convert %[[C1_CVT]] : (index) -> i32
   ! CHECK: %[[LI_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[LI_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[C1_CVT]] to %[[C5_CVT]] step %[[C1]]
   ! CHECK-SAME: iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) {
   do i=1,5
   ! CHECK:   fir.store %[[IV]] to %[[I_REF]] : !fir.ref<i32>
   ! CHECK:   %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[C1]] : index
+  ! NSW:     %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[C1:.*]] overflow<nsw> : index
   ! CHECK:   %[[STEPCAST:.*]] = fir.convert %[[C1]] : (index) -> i32
   ! CHECK:   %[[IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+  ! NSW:     %[[IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
   ! CHECK:   %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] : i32
+  ! NSW:     %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST:.*]] overflow<nsw> : i32
   ! CHECK:  fir.result %[[LI_NEXT]], %[[IVINC]] : index, i32
   ! CHECK: }
   end do
@@ -37,11 +44,14 @@ subroutine simple_loop
 
 ! Test a 2-nested loop with a body composed of a reduction. Values are read from a 2d array.
 ! CHECK-LABEL: nested_loop
+! NSW-LABEL:   nested_loop
 subroutine nested_loop
   ! CHECK: %[[ARR_REF:.*]] = fir.alloca !fir.array<5x5xi32> {bindc_name = "arr", uniq_name = "_QFnested_loopEarr"}
   ! CHECK: %[[ASUM_REF:.*]] = fir.alloca i32 {bindc_name = "asum", uniq_name = "_QFnested_loopEasum"}
   ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_loopEi"}
+  ! NSW:   %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_loopEi"}
   ! CHECK: %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_loopEj"}
+  ! NSW:   %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_loopEj"}
   integer :: asum, arr(5,5)
   integer :: i, j
   asum = 0
@@ -52,6 +62,7 @@ subroutine nested_loop
   ! CHECK: %[[ST_I:.*]] = arith.constant 1 : index
   ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_I_CVT]] : (index) -> i32
   ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[S_I_CVT]] to %[[E_I_CVT]] step %[[ST_I]]
   ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) {
   do i=1,5
@@ -63,6 +74,7 @@ subroutine nested_loop
     ! CHECK: %[[ST_J:.*]] = arith.constant 1 : index
     ! CHECK: %[[J_LB:.*]] = fir.convert %[[S_J_CVT]] : (index) -> i32
     ! CHECK: %[[J_RES:.*]]:2 = fir.do_loop %[[LJ:[^ ]*]] =
+    ! NSW:   %[[J_RES:.*]]:2 = fir.do_loop %[[LJ:[^ ]*]] =
     ! CHECK-SAME: %[[S_J_CVT]] to %[[E_J_CVT]] step %[[ST_J]]
     ! CHECK-SAME: iter_args(%[[J_IV:.*]] = %[[J_LB]]) -> (index, i32) {
     do j=1,5
@@ -82,17 +94,23 @@ subroutine nested_loop
       ! CHECK: fir.store %[[ASUM_NEW]] to %[[ASUM_REF]] : !fir.ref<i32>
       asum = asum + arr(i,j)
       ! CHECK: %[[LJ_NEXT:.*]] = arith.addi %[[LJ]], %[[ST_J]] : index
+      ! NSW:   %[[LJ_NEXT:.*]] = arith.addi %[[LJ]], %[[ST_J:.*]] overflow<nsw> : index
       ! CHECK: %[[J_STEPCAST:.*]] = fir.convert %[[ST_J]] : (index) -> i32
       ! CHECK: %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref<i32>
+      ! NSW:   %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref<i32>
       ! CHECK: %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] : i32
+      ! NSW:   %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST:.*]] overflow<nsw> : i32
       ! CHECK: fir.result %[[LJ_NEXT]], %[[J_IVINC]] : index, i32
     ! CHECK: }
     end do
     ! CHECK: fir.store %[[J_RES]]#1 to %[[J_REF]] : !fir.ref<i32>
     ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_I]] : index
+    ! NSW:   %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_I:.*]] overflow<nsw> : index
     ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_I]] : (index) -> i32
     ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+    ! NSW:   %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
     ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32
+    ! NSW:   %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i32
     ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32
   ! CHECK: }
   end do
@@ -101,9 +119,11 @@ subroutine nested_loop
 
 ! Test a downcounting loop
 ! CHECK-LABEL: down_counting_loop
+! NSW-LABEL:   down_counting_loop
 subroutine down_counting_loop()
   integer :: i
   ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdown_counting_loopEi"}
+  ! NSW:   %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdown_counting_loopEi"}
 
   ! CHECK: %[[C5:.*]] = arith.constant 5 : i32
   ! CHECK: %[[C5_CVT:.*]] = fir.convert %[[C5]] : (i32) -> index
@@ -113,14 +133,18 @@ subroutine down_counting_loop()
   ! CHECK: %[[CMINUS1_STEP_CVT:.*]] = fir.convert %[[CMINUS1]] : (i32) -> index
   ! CHECK: %[[I_LB:.*]] = fir.convert %[[C5_CVT]] : (index) -> i32
   ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[C5_CVT]] to %[[C1_CVT]] step %[[CMINUS1_STEP_CVT]]
   ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) {
   do i=5,1,-1
   ! CHECK: fir.store %[[I_IV]] to %[[I_REF]] : !fir.ref<i32>
   ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[CMINUS1_STEP_CVT]] : index
+  ! NSW:   %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[CMINUS1_STEP_CVT:.*]] overflow<nsw> : index
   ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[CMINUS1_STEP_CVT]] : (index) -> i32
   ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+  ! NSW:   %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
   ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32
+  ! NSW:   %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i32
   ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32
   ! CHECK: }
   end do
@@ -129,6 +153,7 @@ subroutine down_counting_loop()
 
 ! Test a general loop with a variable step
 ! CHECK-LABEL: loop_with_variable_step
+! NSW-LABEL:   loop_with_variable_step
 ! CHECK-SAME: (%[[S_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "s"}, %[[E_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "e"}, %[[ST_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "st"}) {
 subroutine loop_with_variable_step(s,e,st)
   integer :: s, e, st
@@ -141,14 +166,18 @@ subroutine loop_with_variable_step(s,e,st)
   ! CHECK: %[[ST_CVT:.*]] = fir.convert %[[ST]] : (i32) -> index
   ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_CVT]] : (index) -> i32
   ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[S_CVT]] to %[[E_CVT]] step %[[ST_CVT]]
   ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) {
   do i=s,e,st
   ! CHECK:  fir.store %[[I_IV]] to %[[I_REF]] : !fir.ref<i32>
   ! CHECK:  %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] : index
+  ! NSW:    %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT:.*]] overflow<nsw> : index
   ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_CVT]] : (index) -> i32
   ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+  ! NSW:   %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
   ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32
+  ! NSW:   %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i32
   ! CHECK:  fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32
   ! CHECK: }
   end do
@@ -157,11 +186,13 @@ subroutine loop_with_variable_step(s,e,st)
 
 ! Test usage of pointer variables as index, start, end and step variables
 ! CHECK-LABEL: loop_with_pointer_variables
+! NSW-LABEL:   loop_with_pointer_variables
 ! CHECK-SAME: (%[[S_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "s", fir.target}, %[[E_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "e", fir.target}, %[[ST_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "st", fir.target}) {
 subroutine loop_with_pointer_variables(s,e,st)
 ! CHECK:  %[[E_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEeptr.addr"}
 ! CHECK:  %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", fir.target, uniq_name = "_QFloop_with_pointer_variablesEi"}
 ! CHECK:  %[[I_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEiptr.addr"}
+! NSW:    %[[I_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEiptr.addr"}
 ! CHECK:  %[[S_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEsptr.addr"}
 ! CHECK:  %[[ST_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEstptr.addr"}
   integer, target :: i
@@ -182,6 +213,7 @@ subroutine loop_with_pointer_variables(s,e,st)
   stptr => st
 
 ! CHECK:  %[[I_PTR:.*]] = fir.load %[[I_PTR_REF]] : !fir.ref<!fir.ptr<i32>>
+! NSW:    %[[I_PTR:.*]] = fir.load %[[I_PTR_REF]] : !fir.ref<!fir.ptr<i32>>
 ! CHECK:  %[[S_PTR:.*]] = fir.load %[[S_PTR_REF]] : !fir.ref<!fir.ptr<i32>>
 ! CHECK:  %[[S:.*]] = fir.load %[[S_PTR]] : !fir.ptr<i32>
 ! CHECK:  %[[S_CVT:.*]] = fir.convert %[[S]] : (i32) -> index
@@ -193,14 +225,18 @@ subroutine loop_with_pointer_variables(s,e,st)
 ! CHECK:  %[[ST_CVT:.*]] = fir.convert %[[ST]] : (i32) -> index
 ! CHECK:  %[[I_LB:.*]] = fir.convert %[[S_CVT]] : (index) -> i32
 ! CHECK:  %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+! NSW:    %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
 ! CHECK-SAME: %[[S_CVT]] to %[[E_CVT]] step %[[ST_CVT]]
 ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) {
   do iptr=sptr,eptr,stptr
 ! CHECK:    fir.store %[[I_IV]] to %[[I_PTR]] : !fir.ptr<i32>
 ! CHECK:    %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] : index
+! NSW:      %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT:.*]] overflow<nsw> : index
 ! CHECK:    %[[I_STEPCAST:.*]] = fir.convert %[[ST_CVT]] : (index) -> i32
 ! CHECK:    %[[I_IVLOAD:.*]] = fir.load %[[I_PTR]] : !fir.ptr<i32>
+! NSW:      %[[I_IVLOAD:.*]] = fir.load %[[I_PTR]] : !fir.ptr<i32>
 ! CHECK:    %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32
+! NSW:      %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i32
 ! CHECK:    fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32
   end do
 ! CHECK:  }
@@ -209,9 +245,11 @@ subroutine loop_with_pointer_variables(s,e,st)
 
 ! Test usage of non-default integer kind for loop control and loop index variable
 ! CHECK-LABEL: loop_with_non_default_integer
+! NSW-LABEL:   loop_with_non_default_integer
 ! CHECK-SAME: (%[[S_REF:.*]]: !fir.ref<i64> {fir.bindc_name = "s"}, %[[E_REF:.*]]: !fir.ref<i64> {fir.bindc_name = "e"}, %[[ST_REF:.*]]: !fir.ref<i64> {fir.bindc_name = "st"}) {
 subroutine loop_with_non_default_integer(s,e,st)
   ! CHECK: %[[I_REF:.*]] = fir.alloca i64 {bindc_name = "i", uniq_name = "_QFloop_with_non_default_integerEi"}
+  ! NSW:   %[[I_REF:.*]] = fir.alloca i64 {bindc_name = "i", uniq_name = "_QFloop_with_non_default_integerEi"}
   integer(kind=8):: i
   ! CHECK: %[[S:.*]] = fir.load %[[S_REF]] : !fir.ref<i64>
   ! CHECK: %[[S_CVT:.*]] = fir.convert %[[S]] : (i64) -> index
@@ -223,14 +261,18 @@ subroutine loop_with_non_default_integer(s,e,st)
 
   ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_CVT]] : (index) -> i64
   ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[S_CVT]] to %[[E_CVT]] step %[[ST_CVT]]
   ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i64) {
   do i=s,e,st
     ! CHECK: fir.store %[[I_IV]] to %[[I_REF]] : !fir.ref<i64>
     ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] : index
+    ! NSW:   %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT:.*]] overflow<nsw> : index
     ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_CVT]] : (index) -> i64
     ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i64>
+    ! NSW:   %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i64>
     ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i64
+    ! NSW:   %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i64
     ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i64
   end do
   ! CHECK: }
diff --git a/flang/test/Lower/do_loop_unstructured.f90 b/flang/test/Lower/do_loop_unstructured.f90
index c6bdd4b64ce31..e1a669e09c9a8 100644
--- a/flang/test/Lower/do_loop_unstructured.f90
+++ b/flang/test/Lower/do_loop_unstructured.f90
@@ -1,5 +1,6 @@
 ! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -o - %s | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -flang-experimental-integer-overflow -o - %s | FileCheck %s --check-prefix=NSW
 
 ! Tests for unstructured loops.
 
@@ -44,6 +45,36 @@ subroutine simple_unstructured()
 ! CHECK: ^[[EXIT]]:
 ! CHECK:   return
 
+! NSW-LABEL: simple_unstructured
+! NSW:   %[[TRIP_VAR_REF:.*]] = fir.alloca i32
+! NSW:   %[[LOOP_VAR_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_unstructuredEi"}
+! NSW:   %[[ONE:.*]] = arith.constant 1 : i32
+! NSW:   %[[HUNDRED:.*]] = arith.constant 100 : i32
+! NSW:   %[[STEP_ONE:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP1:.*]] = arith.subi %[[HUNDRED]], %[[ONE]] : i32
+! NSW:   %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[STEP_ONE]] : i32
+! NSW:   %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[STEP_ONE]] : i32
+! NSW:   fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[ONE]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER:.*]]
+! NSW: ^[[HEADER]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32
+! NSW:   cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
+! NSW: ^[[BODY]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_NEXT:.*]] = arith.subi %[[TRIP_VAR]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[STEP_ONE_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_ONE_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER]]
+! NSW: ^[[EXIT]]:
+! NSW:   return
+
 ! Test an unstructured loop with a step. Mostly similar to the previous one.
 ! Only difference is a non-unit step.
 subroutine simple_unstructured_with_step()
@@ -83,6 +114,36 @@ subroutine simple_unstructured_with_step()
 ! CHECK: ^[[EXIT]]:
 ! CHECK:   return
 
+! NSW-LABEL: simple_unstructured_with_step
+! NSW:   %[[TRIP_VAR_REF:.*]] = fir.alloca i32
+! NSW:   %[[LOOP_VAR_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_unstructured_with_stepEi"}
+! NSW:   %[[ONE:.*]] = arith.constant 1 : i32
+! NSW:   %[[HUNDRED:.*]] = arith.constant 100 : i32
+! NSW:   %[[STEP:.*]] = arith.constant 2 : i32
+! NSW:   %[[TMP1:.*]] = arith.subi %[[HUNDRED]], %[[ONE]] : i32
+! NSW:   %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[STEP]] : i32
+! NSW:   %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[ONE]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER:.*]]
+! NSW: ^[[HEADER]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32
+! NSW:   cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
+! NSW: ^[[BODY]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_NEXT:.*]] = arith.subi %[[TRIP_VAR]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[STEP_2:.*]] = arith.constant 2 : i32
+! NSW:   %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER]]
+! NSW: ^[[EXIT]]:
+! NSW:   return
+
 ! Test a three nested unstructured loop. Three nesting is the basic case where
 ! we have loops that are neither innermost or outermost.
 subroutine nested_unstructured()
@@ -180,6 +241,90 @@ subroutine nested_unstructured()
 ! CHECK: ^[[EXIT_I]]:
 ! CHECK:   return
 
+! NSW-LABEL: nested_unstructured
+! NSW:   %[[TRIP_VAR_K_REF:.*]] = fir.alloca i32
+! NSW:   %[[TRIP_VAR_J_REF:.*]] = fir.alloca i32
+! NSW:   %[[TRIP_VAR_I_REF:.*]] = fir.alloca i32
+! NSW:   %[[LOOP_VAR_I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_unstructuredEi"}
+! NSW:   %[[LOOP_VAR_J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_unstructuredEj"}
+! NSW:   %[[LOOP_VAR_K_REF:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFnested_unstructuredEk"}
+! NSW:   %[[I_START:.*]] = arith.constant 1 : i32
+! NSW:   %[[I_END:.*]] = arith.constant 100 : i32
+! NSW:   %[[I_STEP:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP1:.*]] = arith.subi %[[I_END]], %[[I_START]] : i32
+! NSW:   %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[I_STEP]] : i32
+! NSW:   %[[TRIP_COUNT_I:.*]] = arith.divsi %[[TMP2]], %[[I_STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT_I]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[I_START]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_I:.*]]
+! NSW: ^[[HEADER_I]]:
+! NSW:   %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO_1:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND_I:.*]] = arith.cmpi sgt, %[[TRIP_VAR_I]], %[[ZERO_1]] : i32
+! NSW:   cf.cond_br %[[COND_I]], ^[[BODY_I:.*]], ^[[EXIT_I:.*]]
+! NSW: ^[[BODY_I]]:
+! NSW:   %[[J_START:.*]] = arith.constant 1 : i32
+! NSW:   %[[J_END:.*]] = arith.constant 200 : i32
+! NSW:   %[[J_STEP:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP3:.*]] = arith.subi %[[J_END]], %[[J_START]] : i32
+! NSW:   %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[J_STEP]] : i32
+! NSW:   %[[TRIP_COUNT_J:.*]] = arith.divsi %[[TMP4]], %[[J_STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT_J]] to %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[J_START]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_J:.*]]
+! NSW: ^[[HEADER_J]]:
+! NSW:   %[[TRIP_VAR_J:.*]] = fir.load %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO_2:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND_J:.*]] = arith.cmpi sgt, %[[TRIP_VAR_J]], %[[ZERO_2]] : i32
+! NSW:   cf.cond_br %[[COND_J]], ^[[BODY_J:.*]], ^[[EXIT_J:.*]]
+! NSW: ^[[BODY_J]]:
+! NSW:   %[[K_START:.*]] = arith.constant 1 : i32
+! NSW:   %[[K_END:.*]] = arith.constant 300 : i32
+! NSW:   %[[K_STEP:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP3:.*]] = arith.subi %[[K_END]], %[[K_START]] : i32
+! NSW:   %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[K_STEP]] : i32
+! NSW:   %[[TRIP_COUNT_K:.*]] = arith.divsi %[[TMP4]], %[[K_STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT_K]] to %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[K_START]] to %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_K:.*]]
+! NSW: ^[[HEADER_K]]:
+! NSW:   %[[TRIP_VAR_K:.*]] = fir.load %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO_2:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND_K:.*]] = arith.cmpi sgt, %[[TRIP_VAR_K]], %[[ZERO_2]] : i32
+! NSW:   cf.cond_br %[[COND_K]], ^[[BODY_K:.*]], ^[[EXIT_K:.*]]
+! NSW: ^[[BODY_K]]:
+! NSW:   %[[TRIP_VAR_K:.*]] = fir.load %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_K_NEXT:.*]] = arith.subi %[[TRIP_VAR_K]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_K_NEXT]] to %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR_K:.*]] = fir.load %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   %[[K_STEP_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_K_NEXT:.*]] = arith.addi %[[LOOP_VAR_K]], %[[K_STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_K_NEXT]] to %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_K]]
+! NSW: ^[[EXIT_K]]:
+! NSW:   %[[TRIP_VAR_J:.*]] = fir.load %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_J_NEXT:.*]] = arith.subi %[[TRIP_VAR_J]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_J_NEXT]] to %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   %[[J_STEP_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %[[J_STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_J_NEXT]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_J]]
+! NSW: ^[[EXIT_J]]:
+! NSW:   %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_I_NEXT:.*]] = arith.subi %[[TRIP_VAR_I]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[I_STEP_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_I]]
+! NSW: ^[[EXIT_I]]:
+! NSW:   return
+
 ! Test the existence of a structured loop inside an unstructured loop.
 ! Only minimal checks are inserted for the structured loop.
 subroutine nested_structured_in_unstructured()
@@ -211,9 +356,12 @@ subroutine nested_structured_in_unstructured()
 ! CHECK:   cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
 ! CHECK: ^[[BODY]]:
 ! CHECK:   %{{.*}} = fir.do_loop %[[J_INDEX:[^ ]*]] =
-! CHECK-SAME: %{{.*}} to %{{.*}} step %{{[^ ]*}}
+! CHECK-SAME: %{{.*}} to %{{.*}} step %[[ST:[^ ]*]]
 ! CHECK-SAME: iter_args(%[[J_IV:.*]] = %{{.*}}) -> (index, i32) {
 ! CHECK:     fir.store %[[J_IV]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! CHECK:     %[[J_INDEX_NEXT:.*]] = arith.addi %[[J_INDEX]], %[[ST]] : index
+! CHECK:     %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! CHECK:     %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %{{[^ ]*}} : i32
 ! CHECK:   }
 ! CHECK:   %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
 ! CHECK:   %[[C1_3:.*]] = arith.constant 1 : i32
@@ -226,3 +374,42 @@ subroutine nested_structured_in_unstructured()
 ! CHECK:   cf.br ^[[HEADER]]
 ! CHECK: ^[[EXIT]]:
 ! CHECK:   return
+
+! NSW-LABEL: nested_structured_in_unstructured
+! NSW:   %[[TRIP_VAR_I_REF:.*]] = fir.alloca i32
+! NSW:   %[[LOOP_VAR_I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_structured_in_unstructuredEi"}
+! NSW:   %[[LOOP_VAR_J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_structured_in_unstructuredEj"}
+! NSW:   %[[I_START:.*]] = arith.constant 1 : i32
+! NSW:   %[[I_END:.*]] = arith.constant 100 : i32
+! NSW:   %[[I_STEP:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP1:.*]] = arith.subi %[[I_END]], %[[I_START]] : i32
+! NSW:   %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[I_STEP]] : i32
+! NSW:   %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[I_STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[I_START]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER:.*]]
+! NSW: ^[[HEADER]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32
+! NSW:   cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
+! NSW: ^[[BODY]]:
+! NSW:   %{{.*}} = fir.do_loop %[[J_INDEX:[^ ]*]] =
+! NSW-SAME: %{{.*}} to %{{.*}} step %[[ST:[^ ]*]]
+! NSW-SAME: iter_args(%[[J_IV:.*]] = %{{.*}}) -> (index, i32) {
+! NSW:     fir.store %[[J_IV]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:     %[[J_INDEX_NEXT:.*]] = arith.addi %[[J_INDEX]], %[[ST]] overflow<nsw> : index
+! NSW:     %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:     %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %{{[^ ]*}} overflow<nsw> : i32
+! NSW:   }
+! NSW:   %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[C1_3:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_I_NEXT:.*]] = arith.subi %[[TRIP_VAR_I]], %[[C1_3]] : i32
+! NSW:   fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[I_STEP_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER]]
+! NSW: ^[[EXIT]]:
+! NSW:   return
diff --git a/flang/test/Lower/infinite_loop.f90 b/flang/test/Lower/infinite_loop.f90
index 0450e2c4485fe..6942dda8d7a23 100644
--- a/flang/test/Lower/infinite_loop.f90
+++ b/flang/test/Lower/infinite_loop.f90
@@ -1,5 +1,6 @@
 ! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -o - %s | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -flang-experimental-integer-overflow -o - %s | FileCheck %s --check-prefix=NSW
 
 ! Tests for infinite loop.
 
@@ -106,6 +107,39 @@ subroutine structured_loop_in_infinite(i)
 ! CHECK: ^[[RETURN]]:
 ! CHECK:   return
 
+! NSW-LABEL: structured_loop_in_infinite
+! NSW-SAME: %[[I_REF:.*]]: !fir.ref<i32>
+! NSW:  %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFstructured_loop_in_infiniteEj"}
+! NSW:  cf.br ^[[BODY1:.*]]
+! NSW: ^[[BODY1]]:
+! NSW:  %[[I:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+! NSW:  %[[C100:.*]] = arith.constant 100 : i32
+! NSW:  %[[COND:.*]] = arith.cmpi sgt, %[[I]], %[[C100]] : i32
+! NSW:  cf.cond_br %[[COND]], ^[[EXIT:.*]], ^[[BODY2:.*]]
+! NSW: ^[[EXIT]]:
+! NSW:  cf.br ^[[RETURN:.*]]
+! NSW: ^[[BODY2:.*]]:
+! NSW:  %[[C1:.*]] = arith.constant 1 : i32
+! NSW:  %[[C1_INDEX:.*]] = fir.convert %[[C1]] : (i32) -> index
+! NSW:  %[[C10:.*]] = arith.constant 10 : i32
+! NSW:  %[[C10_INDEX:.*]] = fir.convert %[[C10]] : (i32) -> index
+! NSW:  %[[C1_1:.*]] = arith.constant 1 : index
+! NSW:  %[[J_LB:.*]] = fir.convert %[[C1_INDEX]] : (index) -> i32
+! NSW:  %[[J_FINAL:.*]]:2 = fir.do_loop %[[J:[^ ]*]] =
+! NSW-SAME: %[[C1_INDEX]] to %[[C10_INDEX]] step %[[C1_1]]
+! NSW-SAME: iter_args(%[[J_IV:.*]] = %[[J_LB]]) -> (index, i32) {
+! NSW:    fir.store %[[J_IV]] to %[[J_REF]] : !fir.ref<i32>
+! NSW:    %[[J_NEXT:.*]] = arith.addi %[[J]], %[[C1_1]] overflow<nsw> : index
+! NSW:    %[[J_STEPCAST:.*]] = fir.convert %[[C1_1]] : (index) -> i32
+! NSW:    %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref<i32>
+! NSW:    %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] overflow<nsw> : i32
+! NSW:    fir.result %[[J_NEXT]], %[[J_IVINC]] : index, i32
+! NSW:  }
+! NSW:  fir.store %[[J_FINAL]]#1 to %[[J_REF]] : !fir.ref<i32>
+! NSW:  cf.br ^[[BODY1]]
+! NSW: ^[[RETURN]]:
+! NSW:   return
+
 subroutine empty_infinite_in_while(i)
   integer :: i
   do while (i .gt. 50)
diff --git a/flang/test/Lower/io-implied-do-fixes.f90 b/flang/test/Lower/io-implied-do-fixes.f90
index a309efa17f124..a6c115fa80ded 100644
--- a/flang/test/Lower/io-implied-do-fixes.f90
+++ b/flang/test/Lower/io-implied-do-fixes.f90
@@ -1,4 +1,5 @@
 ! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false %s -o - | FileCheck %s
+! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false -integer-overflow %s -o - | FileCheck %s --check-prefix=NSW
 ! UNSUPPORTED: system-windows
 
 ! CHECK-LABEL: func @_QPido1
@@ -7,9 +8,23 @@
 ! CHECK: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index {
 ! CHECK:   %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
 ! CHECK:   fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.ptr<i32>
+! CHECK:   %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} : index
+! CHECK:   fir.result %[[J_VAL_NEXT]] : index
 ! CHECK: }
 ! CHECK: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32
 ! CHECK: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.ptr<i32>
+
+! NSW-LABEL: func @_QPido1
+! NSW: %[[J_REF_ADDR:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFido1Eiptr.addr"}
+! NSW: %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref<!fir.ptr<i32>>
+! NSW: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index {
+! NSW:   %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
+! NSW:   fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.ptr<i32>
+! NSW:   %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow<nsw> : index
+! NSW:   fir.result %[[J_VAL_NEXT]] : index
+! NSW: }
+! NSW: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32
+! NSW: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.ptr<i32>
 subroutine ido1
   integer, pointer :: iptr
   integer, target :: itgt
@@ -23,9 +38,23 @@ subroutine ido1
 ! CHECK: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index {
 ! CHECK: %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
 ! CHECK: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap<i32>
+! CHECK: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} : index
+! CHECK: fir.result %[[J_VAL_NEXT]] : index
 ! CHECK: }
 ! CHECK: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32
 ! CHECK: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap<i32>
+
+! NSW-LABEL: func @_QPido2
+! NSW: %[[J_REF_ADDR:.*]] = fir.alloca !fir.heap<i32> {uniq_name = "_QFido2Eiptr.addr"}
+! NSW: %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref<!fir.heap<i32>>
+! NSW: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index {
+! NSW: %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
+! NSW: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap<i32>
+! NSW: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow<nsw> : index
+! NSW: fir.result %[[J_VAL_NEXT]] : index
+! NSW: }
+! NSW: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32
+! NSW: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap<i32>
 subroutine ido2
   integer, allocatable :: iptr
   allocate(iptr)
@@ -35,12 +64,32 @@ subroutine ido2
 ! CHECK-LABEL: func @_QPido3
 ! CHECK:  %[[J_REF_ADDR:.*]] = fir.alloca !fir.heap<i32> {uniq_name = "_QFido3Ej.addr"}
 ! CHECK:  %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref<!fir.heap<i32>>
-! CHECK:  %[[J_VAL_FINAL:.*]]:2 = fir.iterate_while (%[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}}) and ({{.*}}) -> (index, i1) {
+! CHECK:  %[[J_VAL_FINAL:.*]]:2 = fir.iterate_while (%[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}}) and (%[[OK:.*]] = {{.*}}) -> (index, i1) {
 ! CHECK:    %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
 ! CHECK:    fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap<i32>
+! CHECK:    %[[RES:.*]] = fir.if %[[OK]] -> (i1) {
+! CHECK:    }
+! CHECK:   %[[J_VAL_INC:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} : index
+! CHECK:   %[[J_VAL_NEXT:.*]] = arith.select %[[RES]], %[[J_VAL_INC]], %[[J_VAL]] : index
+! CHECK:   fir.result %[[J_VAL_NEXT]], %[[RES]] : index, i1
 ! CHECK:  }
 ! CHECK:  %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]]#0 : (index) -> i32
 ! CHECK:  fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap<i32
+
+! NSW-LABEL: func @_QPido3
+! NSW:  %[[J_REF_ADDR:.*]] = fir.alloca !fir.heap<i32> {uniq_name = "_QFido3Ej.addr"}
+! NSW:  %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref<!fir.heap<i32>>
+! NSW:  %[[J_VAL_FINAL:.*]]:2 = fir.iterate_while (%[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}}) and (%[[OK:.*]] = {{.*}}) -> (index, i1) {
+! NSW:    %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
+! NSW:    fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap<i32>
+! NSW:    %[[RES:.*]] = fir.if %[[OK]] -> (i1) {
+! NSW:    }
+! NSW:   %[[J_VAL_INC:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow<nsw> : index
+! NSW:   %[[J_VAL_NEXT:.*]] = arith.select %[[RES]], %[[J_VAL_INC]], %[[J_VAL]] : index
+! NSW:   fir.result %[[J_VAL_NEXT]], %[[RES]] : index, i1
+! NSW:  }
+! NSW:  %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]]#0 : (index) -> i32
+! NSW:  fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap<i32
 subroutine ido3
   integer, allocatable :: j
   allocate(j)
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index ee2ff8562e9ff..70e4b8aa686e9 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -212,6 +212,11 @@ static llvm::cl::opt<std::string>
                          llvm::cl::desc("Override host target triple"),
                          llvm::cl::init(""));
 
+static llvm::cl::opt<bool>
+    setNSW("integer-overflow",
+           llvm::cl::desc("add nsw flag to internal operations"),
+           llvm::cl::init(false));
+
 #define FLANG_EXCLUDE_CODEGEN
 #include "flang/Tools/CLOptions.inc"
 
@@ -351,6 +356,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
   Fortran::lower::LoweringOptions loweringOptions{};
   loweringOptions.setNoPPCNativeVecElemOrder(enableNoPPCNativeVecElemOrder);
   loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR);
+  loweringOptions.setNoSignedWrap(setNSW);
   std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {};
   auto burnside = Fortran::lower::LoweringBridge::create(
       ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),
@@ -428,6 +434,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
 
     // Add O2 optimizer pass pipeline.
     MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2);
+    config.NoSignedWrap = setNSW;
     fir::registerDefaultInlinerPass(config);
     fir::createDefaultFIROptimizerPassPipeline(pm, config);
   }

>From 240732263eccb4a8562367b0cf8cc3fc2845470b Mon Sep 17 00:00:00 2001
From: Yusuke MINATO <minato.yusuke at fujitsu.com>
Date: Fri, 10 May 2024 14:42:08 +0900
Subject: [PATCH 2/3] empty commit to re-run CI


>From 1d64007a4ef0b83c5226d9d60c2e29938106741a Mon Sep 17 00:00:00 2001
From: Yusuke MINATO <minato.yusuke at fujitsu.com>
Date: Tue, 14 May 2024 15:17:44 +0900
Subject: [PATCH 3/3] modify the name of the lowering option and its comment

---
 flang/include/flang/Lower/LoweringOptions.def | 4 ++--
 flang/include/flang/Tools/CLOptions.inc       | 2 +-
 flang/include/flang/Tools/CrossToolHelpers.h  | 2 +-
 flang/lib/Frontend/CompilerInvocation.cpp     | 2 +-
 flang/lib/Frontend/FrontendActions.cpp        | 4 ++--
 flang/lib/Lower/Bridge.cpp                    | 2 +-
 flang/lib/Lower/IO.cpp                        | 2 +-
 flang/tools/bbc/bbc.cpp                       | 4 ++--
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def
index 839d2b46249b0..7594a57a26291 100644
--- a/flang/include/flang/Lower/LoweringOptions.def
+++ b/flang/include/flang/Lower/LoweringOptions.def
@@ -34,9 +34,9 @@ ENUM_LOWERINGOPT(NoPPCNativeVecElemOrder, unsigned, 1, 0)
 /// On by default.
 ENUM_LOWERINGOPT(Underscoring, unsigned, 1, 1)
 
-/// If true, add nsw flags to arithmetic operations for integer.
+/// If true, add nsw flags to loop variable increments.
 /// Off by default.
-ENUM_LOWERINGOPT(NoSignedWrap, unsigned, 1, 0)
+ENUM_LOWERINGOPT(NSWOnLoopVarInc, unsigned, 1, 0)
 
 #undef LOWERINGOPT
 #undef ENUM_LOWERINGOPT
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 5ad7df714d348..e50a679365331 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -150,7 +150,7 @@ static void addCanonicalizerPassWithoutRegionSimplification(
 
 inline void addCfgConversionPass(
     mlir::PassManager &pm, const MLIRToLLVMPassPipelineConfig &config) {
-  if (config.NoSignedWrap)
+  if (config.NSWOnLoopVarInc)
     addNestedPassToAllTopLevelOperationsConditionally(
         pm, disableCfgConversion, fir::createCFGConversionPassWithNSW);
   else
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 583daa30289d6..77b68fc6187fa 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -122,7 +122,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
   bool NoSignedZerosFPMath =
       false; ///< Set no-signed-zeros-fp-math attribute for functions.
   bool UnsafeFPMath = false; ///< Set unsafe-fp-math attribute for functions.
-  bool NoSignedWrap = false; ///< Add nsw flag to numeric operations.
+  bool NSWOnLoopVarInc = false; ///< Add nsw flag to loop variable increments.
 };
 
 struct OffloadModuleOpts {
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index f72f181bade9c..87dd3a98d7da7 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1206,7 +1206,7 @@ bool CompilerInvocation::createFromArgs(
   // -flang-experimental-integer-overflow
   if (args.hasArg(
           clang::driver::options::OPT_flang_experimental_integer_overflow)) {
-    invoc.loweringOpts.setNoSignedWrap(true);
+    invoc.loweringOpts.setNSWOnLoopVarInc(true);
   }
 
   // Preserve all the remark options requested, i.e. -Rpass, -Rpass-missed or
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index deced43462607..2770b79f645d3 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -809,8 +809,8 @@ void CodeGenAction::generateLLVMIR() {
     config.VScaleMax = vsr->second;
   }
 
-  if (ci.getInvocation().getLoweringOpts().getNoSignedWrap())
-    config.NoSignedWrap = true;
+  if (ci.getInvocation().getLoweringOpts().getNSWOnLoopVarInc())
+    config.NSWOnLoopVarInc = true;
 
   // Create the pass pipeline
   fir::createMLIRToLLVMPassPipeline(pm, config, getCurrentFile());
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 58460a9cfb6cf..b8c4dde9daa7c 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2008,7 +2008,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     assert(!incrementLoopNestInfo.empty() && "empty loop nest");
     mlir::Location loc = toLocation();
     mlir::arith::IntegerOverflowFlags flags{};
-    if (getLoweringOptions().getNoSignedWrap())
+    if (getLoweringOptions().getNSWOnLoopVarInc())
       flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw);
     auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get(
         builder->getContext(), flags);
diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp
index cc0347a4771eb..97ef991cb3990 100644
--- a/flang/lib/Lower/IO.cpp
+++ b/flang/lib/Lower/IO.cpp
@@ -929,7 +929,7 @@ static void genIoLoop(Fortran::lower::AbstractConverter &converter,
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   mlir::Location loc = converter.getCurrentLocation();
   mlir::arith::IntegerOverflowFlags flags{};
-  if (converter.getLoweringOptions().getNoSignedWrap())
+  if (converter.getLoweringOptions().getNSWOnLoopVarInc())
     flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw);
   auto iofAttr =
       mlir::arith::IntegerOverflowFlagsAttr::get(builder.getContext(), flags);
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index 70e4b8aa686e9..8bc803f1709ca 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -356,7 +356,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
   Fortran::lower::LoweringOptions loweringOptions{};
   loweringOptions.setNoPPCNativeVecElemOrder(enableNoPPCNativeVecElemOrder);
   loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR);
-  loweringOptions.setNoSignedWrap(setNSW);
+  loweringOptions.setNSWOnLoopVarInc(setNSW);
   std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {};
   auto burnside = Fortran::lower::LoweringBridge::create(
       ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),
@@ -434,7 +434,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
 
     // Add O2 optimizer pass pipeline.
     MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2);
-    config.NoSignedWrap = setNSW;
+    config.NSWOnLoopVarInc = setNSW;
     fir::registerDefaultInlinerPass(config);
     fir::createDefaultFIROptimizerPassPipeline(pm, config);
   }