[clang] [flang] [Flang] Add -ffast-real-mod back for further control of MOD optimizations (PR #167118)

Fri Nov 21 14:32:47 PST 2025

https://github.com/mjklemm updated https://github.com/llvm/llvm-project/pull/167118

>From bb7b9b63845c6cd3135169be2e27076060ce92a9 Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Sat, 8 Nov 2025 11:03:17 +0100
Subject: [PATCH 1/5] [Flang] Add -ffast-real-mod back for further control of
 MOD optimizations

---
 clang/include/clang/Options/Options.td        |  3 +
 clang/lib/Driver/ToolChains/Flang.cpp         |  2 +
 flang/include/flang/Support/LangOptions.def   |  2 +-
 flang/lib/Frontend/CompilerInvocation.cpp     |  5 +-
 flang/lib/Frontend/FrontendActions.cpp        |  4 +-
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 10 +-
 flang/test/Driver/fast-real-mod.f90           |  2 +
 flang/test/Lower/Intrinsics/fast-real-mod.f90 | 94 +++++++++----------
 8 files changed, 65 insertions(+), 57 deletions(-)

diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 786acd6abbd21..efdc44d5ec48d 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -2782,6 +2782,9 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations">
   Group<f_Group>;
 def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
 def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
+def ffast_real_mod : Flag<["-"], "ffast-real-mod">,
+  Group<f_Group>, Visibility<[FlangOption, FC1Option]>,
+  HelpText<"Enable optimization of MOD for REAL types">;
 def fno_fast_real_mod : Flag<["-"], "fno-fast-real-mod">,
   Group<f_Group>, Visibility<[FlangOption, FC1Option]>,
   HelpText<"Disable optimization of MOD for REAL types in presence of -ffast-math">;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 270904de544d6..909d7440dc362 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -822,6 +822,8 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,
                                          complexRangeKindToStr(Range)));
   }
 
+  if (Args.hasArg(options::OPT_ffast_real_mod))
+    CmdArgs.push_back("-ffast-real-mod");
   if (Args.hasArg(options::OPT_fno_fast_real_mod))
     CmdArgs.push_back("-fno-fast-real-mod");
 
diff --git a/flang/include/flang/Support/LangOptions.def b/flang/include/flang/Support/LangOptions.def
index e7185c836f45b..e310ecf37a52d 100644
--- a/flang/include/flang/Support/LangOptions.def
+++ b/flang/include/flang/Support/LangOptions.def
@@ -61,7 +61,7 @@ LANGOPT(OpenMPNoNestedParallelism, 1, 0)
 /// Use SIMD only OpenMP support.
 LANGOPT(OpenMPSimd, 1, false)
 /// Enable fast MOD operations for REAL
-LANGOPT(NoFastRealMod, 1, false)
+LANGOPT(FastRealMod, 1, false)
 LANGOPT(VScaleMin, 32, 0)  ///< Minimum vscale range value
 LANGOPT(VScaleMax, 32, 0)  ///< Maximum vscale range value
 
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 893121fe01f27..9e48dd6373faf 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1403,11 +1403,14 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc,
     opts.ReciprocalMath = true;
     opts.ApproxFunc = true;
     opts.NoSignedZeros = true;
+    opts.FastRealMod = true;
     opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast);
   }
 
+  if (args.hasArg(clang::options::OPT_ffast_real_mod))
+    opts.FastRealMod = true;
   if (args.hasArg(clang::options::OPT_fno_fast_real_mod))
-    opts.NoFastRealMod = true;
+    opts.FastRealMod = false;
 
   return true;
 }
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 159d08a2797b3..ddf125f9bb216 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -277,11 +277,11 @@ bool CodeGenAction::beginSourceFileAction() {
                               ci.getInvocation().getLangOpts().OpenMPVersion);
   }
 
-  if (ci.getInvocation().getLangOpts().NoFastRealMod) {
+  if (ci.getInvocation().getLangOpts().FastRealMod) {
     mlir::ModuleOp mod = lb.getModule();
     mod.getOperation()->setAttr(
         mlir::StringAttr::get(mod.getContext(),
-                              llvm::Twine{"fir.no_fast_real_mod"}),
+                              llvm::Twine{"fir.fast_real_mod"}),
         mlir::BoolAttr::get(mod.getContext(), true));
   }
 
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 60dc02474faf6..1714d48980a85 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -6526,11 +6526,9 @@ static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc,
 mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
                                      llvm::ArrayRef<mlir::Value> args) {
   auto mod = builder.getModule();
-  bool dontUseFastRealMod = false;
-  bool canUseApprox = mlir::arith::bitEnumContainsAny(
-      builder.getFastMathFlags(), mlir::arith::FastMathFlags::afn);
-  if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.no_fast_real_mod"))
-    dontUseFastRealMod = attr.getValue();
+  bool useFastRealMod = false;
+  if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.fast_real_mod"))
+    useFastRealMod = attr.getValue();
 
   assert(args.size() == 2);
   if (resultType.isUnsignedInteger()) {
@@ -6543,7 +6541,7 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
   if (mlir::isa<mlir::IntegerType>(resultType))
     return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]);
 
-  if (resultType.isFloat() && canUseApprox && !dontUseFastRealMod) {
+  if (resultType.isFloat() && useFastRealMod) {
     // Treat MOD as an approximate function and code-gen inline code
     // instead of calling into the Fortran runtime library.
     return builder.createConvert(loc, resultType,
diff --git a/flang/test/Driver/fast-real-mod.f90 b/flang/test/Driver/fast-real-mod.f90
index 4ea9b26e64753..8184f334c3d85 100644
--- a/flang/test/Driver/fast-real-mod.f90
+++ b/flang/test/Driver/fast-real-mod.f90
@@ -1,5 +1,7 @@
+! RUN: %flang -ffast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-FAST-REAL-MOD
 ! RUN: %flang -fno-fast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-NO-FAST-REAL-MOD
 
+! CHECK-FAST-REAL-MOD: "-ffast-real-mod"
 ! CHECK-NO-FAST-REAL-MOD: "-fno-fast-real-mod"
 
 program test
diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90
index f80f7203ad1a2..8613c0eb3c2f6 100644
--- a/flang/test/Lower/Intrinsics/fast-real-mod.f90
+++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90
@@ -1,24 +1,24 @@
-! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%}
+! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-FRM%if target=x86_64{{.*}} %{,CHECK-FRM-KIND10%}%if flang-supports-f128-math %{,CHECK-FRM-KIND16%}
+! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-FRM%if target=x86_64{{.*}} %{,CHECK-FRM-KIND10%}%if flang-supports-f128-math %{,CHECK-FRM-KIND16%}
 ! RUN: %flang_fc1 -ffast-math -fno-fast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-NFRM%if target=x86_64{{.*}} %{,CHECK-NFRM-KIND10%}%if flang-supports-f128-math %{,CHECK-NFRM-KIND16%}
 
-! TODO: check line that fir.fast_real_mod is not there
-! CHECK-NFRM: module attributes {{{.*}}fir.no_fast_real_mod = true{{.*}}}
+! CHECK,CHECK-FRM: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}}
 
 ! CHECK-LABEL: @_QPmod_real4
 subroutine mod_real4(r, a, p)
     implicit none
     real(kind=4) :: r, a, p
-! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
-! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
-! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
-! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
-! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
-! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f32
-! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
-! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
-! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f32
-! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f32
-! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
+! CHECK-FRM: %[[A:.*]] = fir.declare{{.*}}a"
+! CHECK-FRM: %[[P:.*]] = fir.declare{{.*}}p"
+! CHECK-FRM: %[[R:.*]] = fir.declare{{.*}}r"
+! CHECK-FRM: %[[A_LOAD:.*]] = fir.load %[[A]]
+! CHECK-FRM: %[[P_LOAD:.*]] = fir.load %[[P]]
+! CHECK-FRM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<{{.*}}> : f32
+! CHECK-FRM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
+! CHECK-FRM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
+! CHECK-FRM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<{{.*}}> : f32
+! CHECK-FRM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<{{.*}}> : f32
+! CHECK-FRM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
 ! CHECK-NFRM: fir.call @_FortranAModReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f32, f32, !fir.ref<i8>, i32) -> f32
     r = mod(a, p)
 end subroutine mod_real4
@@ -27,17 +27,17 @@ end subroutine mod_real4
 subroutine mod_real8(r, a, p)
     implicit none
     real(kind=8) :: r, a, p
-! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
-! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
-! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
-! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
-! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
-! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f64
-! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
-! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
-! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f64
-! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f64
-! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
+! CHECK-FRM: %[[A:.*]] = fir.declare{{.*}}a"
+! CHECK-FRM: %[[P:.*]] = fir.declare{{.*}}p"
+! CHECK-FRM: %[[R:.*]] = fir.declare{{.*}}r"
+! CHECK-FRM: %[[A_LOAD:.*]] = fir.load %[[A]]
+! CHECK-FRM: %[[P_LOAD:.*]] = fir.load %[[P]]
+! CHECK-FRM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<{{.*}}> : f64
+! CHECK-FRM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
+! CHECK-FRM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
+! CHECK-FRM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<{{.*}}> : f64
+! CHECK-FRM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<{{.*}}> : f64
+! CHECK-FRM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
 ! CHECK-NFRM: fir.call @_FortranAModReal8(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f64, f64, !fir.ref<i8>, i32) -> f64
     r = mod(a, p)
 end subroutine mod_real8
@@ -47,17 +47,17 @@ subroutine mod_real10(r, a, p)
     implicit none
     integer, parameter :: kind10 = merge(10, 4, selected_real_kind(p=18).eq.10)
     real(kind=kind10) :: r, a, p
-! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
-! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
-! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
-! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
-! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
-! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f80
-! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
-! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
-! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f80
-! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f80
-! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
+! CHECK-FRM-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
+! CHECK-FRM-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
+! CHECK-FRM-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
+! CHECK-FRM-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
+! CHECK-FRM-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
+! CHECK-FRM-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<{{.*}}> : f80
+! CHECK-FRM-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
+! CHECK-FRM-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
+! CHECK-FRM-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<{{.*}}> : f80
+! CHECK-FRM-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<{{.*}}> : f80
+! CHECK-FRM-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
 ! CHECK-NFRM-KIND10: fir.call @_FortranAModReal10(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f80, f80, !fir.ref<i8>, i32) -> f80
     r = mod(a, p)
 end subroutine mod_real10
@@ -67,17 +67,17 @@ subroutine mod_real16(r, a, p)
     implicit none
     integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16)
     real(kind=kind16) :: r, a, p
-! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
-! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
-! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
-! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
-! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
-! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f128
-! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
-! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
-! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f128
-! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f128
-! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
+! CHECK-FRM-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
+! CHECK-FRM-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
+! CHECK-FRM-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
+! CHECK-FRM-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
+! CHECK-FRM-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
+! CHECK-FRM-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<{{.*}}> : f128
+! CHECK-FRM-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
+! CHECK-FRM-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
+! CHECK-FRM-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<{{.*}}> : f128
+! CHECK-FRM-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<{{.*}}> : f128
+! CHECK-FRM-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
 ! CHECK-NFRM-KIND16: fir.call @_FortranAModReal16(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f128, f128, !fir.ref<i8>, i32) -> f128
     r = mod(a, p)
 end subroutine mod_real16

>From e449ea8ce31a8f306a97807632df858485242d00 Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Thu, 13 Nov 2025 16:53:04 +0100
Subject: [PATCH 2/5] Update test according to PR reviews

---
 flang/test/Lower/Intrinsics/fast-real-mod.f90 | 75 +++++++++++++++----
 1 file changed, 61 insertions(+), 14 deletions(-)

diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90
index 8613c0eb3c2f6..6ecbb9b03f701 100644
--- a/flang/test/Lower/Intrinsics/fast-real-mod.f90
+++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90
@@ -1,8 +1,11 @@
 ! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-FRM%if target=x86_64{{.*}} %{,CHECK-FRM-KIND10%}%if flang-supports-f128-math %{,CHECK-FRM-KIND16%}
-! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-FRM%if target=x86_64{{.*}} %{,CHECK-FRM-KIND10%}%if flang-supports-f128-math %{,CHECK-FRM-KIND16%}
+! RUN: %flang_fc1 -ffast-real-mod -fno-fast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-NFRM%if target=x86_64{{.*}} %{,CHECK-NFRM-KIND10%}%if flang-supports-f128-math %{,CHECK-NFRM-KIND16%}
+! RUN: %flang_fc1 -fno-fast-real-mod -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-FRM%if target=x86_64{{.*}} %{,CHECK-FRM-KIND10%}%if flang-supports-f128-math %{,CHECK-FRM-KIND16%}
+! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-FM%if target=x86_64{{.*}} %{,CHECK-FM-KIND10%}%if flang-supports-f128-math %{,CHECK-FM-KIND16%}
 ! RUN: %flang_fc1 -ffast-math -fno-fast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-NFRM%if target=x86_64{{.*}} %{,CHECK-NFRM-KIND10%}%if flang-supports-f128-math %{,CHECK-NFRM-KIND16%}
 
-! CHECK,CHECK-FRM: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}}
+! CHECK-FM: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}}
+! CHECK-FRM: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}}
 
 ! CHECK-LABEL: @_QPmod_real4
 subroutine mod_real4(r, a, p)
@@ -13,12 +16,23 @@ subroutine mod_real4(r, a, p)
 ! CHECK-FRM: %[[R:.*]] = fir.declare{{.*}}r"
 ! CHECK-FRM: %[[A_LOAD:.*]] = fir.load %[[A]]
 ! CHECK-FRM: %[[P_LOAD:.*]] = fir.load %[[P]]
-! CHECK-FRM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<{{.*}}> : f32
+! CHECK-FRM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f32
 ! CHECK-FRM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
 ! CHECK-FRM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
-! CHECK-FRM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<{{.*}}> : f32
-! CHECK-FRM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<{{.*}}> : f32
+! CHECK-FRM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f32
+! CHECK-FRM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f32
 ! CHECK-FRM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
+! CHECK-FM: %[[A:.*]] = fir.declare{{.*}}a"
+! CHECK-FM: %[[P:.*]] = fir.declare{{.*}}p"
+! CHECK-FM: %[[R:.*]] = fir.declare{{.*}}r"
+! CHECK-FM: %[[A_LOAD:.*]] = fir.load %[[A]]
+! CHECK-FM: %[[P_LOAD:.*]] = fir.load %[[P]]
+! CHECK-FM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f32
+! CHECK-FM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
+! CHECK-FM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
+! CHECK-FM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f32
+! CHECK-FM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f32
+! CHECK-FM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
 ! CHECK-NFRM: fir.call @_FortranAModReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f32, f32, !fir.ref<i8>, i32) -> f32
     r = mod(a, p)
 end subroutine mod_real4
@@ -32,12 +46,23 @@ subroutine mod_real8(r, a, p)
 ! CHECK-FRM: %[[R:.*]] = fir.declare{{.*}}r"
 ! CHECK-FRM: %[[A_LOAD:.*]] = fir.load %[[A]]
 ! CHECK-FRM: %[[P_LOAD:.*]] = fir.load %[[P]]
-! CHECK-FRM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<{{.*}}> : f64
+! CHECK-FRM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f64
 ! CHECK-FRM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
 ! CHECK-FRM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
-! CHECK-FRM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<{{.*}}> : f64
-! CHECK-FRM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<{{.*}}> : f64
+! CHECK-FRM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f64
+! CHECK-FRM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f64
 ! CHECK-FRM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
+! CHECK-FM: %[[A:.*]] = fir.declare{{.*}}a"
+! CHECK-FM: %[[P:.*]] = fir.declare{{.*}}p"
+! CHECK-FM: %[[R:.*]] = fir.declare{{.*}}r"
+! CHECK-FM: %[[A_LOAD:.*]] = fir.load %[[A]]
+! CHECK-FM: %[[P_LOAD:.*]] = fir.load %[[P]]
+! CHECK-FM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f64
+! CHECK-FM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
+! CHECK-FM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
+! CHECK-FM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f64
+! CHECK-FM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f64
+! CHECK-FM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
 ! CHECK-NFRM: fir.call @_FortranAModReal8(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f64, f64, !fir.ref<i8>, i32) -> f64
     r = mod(a, p)
 end subroutine mod_real8
@@ -52,12 +77,23 @@ subroutine mod_real10(r, a, p)
 ! CHECK-FRM-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
 ! CHECK-FRM-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
 ! CHECK-FRM-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
-! CHECK-FRM-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<{{.*}}> : f80
+! CHECK-FRM-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f80
 ! CHECK-FRM-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
 ! CHECK-FRM-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
-! CHECK-FRM-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<{{.*}}> : f80
-! CHECK-FRM-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<{{.*}}> : f80
+! CHECK-FRM-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f80
+! CHECK-FRM-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f80
 ! CHECK-FRM-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
+! CHECK-FM-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
+! CHECK-FM-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
+! CHECK-FM-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
+! CHECK-FM-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
+! CHECK-FM-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
+! CHECK-FM-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f80
+! CHECK-FM-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
+! CHECK-FM-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
+! CHECK-FM-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f80
+! CHECK-FM-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f80
+! CHECK-FM-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
 ! CHECK-NFRM-KIND10: fir.call @_FortranAModReal10(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f80, f80, !fir.ref<i8>, i32) -> f80
     r = mod(a, p)
 end subroutine mod_real10
@@ -72,12 +108,23 @@ subroutine mod_real16(r, a, p)
 ! CHECK-FRM-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
 ! CHECK-FRM-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
 ! CHECK-FRM-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
-! CHECK-FRM-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<{{.*}}> : f128
+! CHECK-FRM-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f128
 ! CHECK-FRM-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
 ! CHECK-FRM-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
-! CHECK-FRM-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<{{.*}}> : f128
-! CHECK-FRM-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<{{.*}}> : f128
+! CHECK-FRM-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f128
+! CHECK-FRM-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f128
 ! CHECK-FRM-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
+! CHECK-FM-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
+! CHECK-FM-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
+! CHECK-FM-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
+! CHECK-FM-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
+! CHECK-FM-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
+! CHECK-FM-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f128
+! CHECK-FM-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
+! CHECK-FM-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
+! CHECK-FM-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f128
+! CHECK-FM-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f128
+! CHECK-FM-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
 ! CHECK-NFRM-KIND16: fir.call @_FortranAModReal16(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f128, f128, !fir.ref<i8>, i32) -> f128
     r = mod(a, p)
 end subroutine mod_real16

>From 8d4d1812423cd49159bd8e864bf18e77d16711bd Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Thu, 13 Nov 2025 16:54:12 +0100
Subject: [PATCH 3/5] Fix flag handling

---
 flang/lib/Frontend/CompilerInvocation.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 9e48dd6373faf..0c32f3914e04b 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1407,10 +1407,14 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc,
     opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast);
   }
 
-  if (args.hasArg(clang::options::OPT_ffast_real_mod))
-    opts.FastRealMod = true;
-  if (args.hasArg(clang::options::OPT_fno_fast_real_mod))
-    opts.FastRealMod = false;
+  if (llvm::opt::Arg *arg =
+          args.getLastArg(clang::options::OPT_ffast_real_mod,
+                          clang::options::OPT_fno_fast_real_mod)) {
+    if (arg->getOption().matches(clang::options::OPT_ffast_real_mod))
+      opts.FastRealMod = true;
+    if (arg->getOption().matches(clang::options::OPT_fno_fast_real_mod))
+      opts.FastRealMod = false;
+  }
 
   return true;
 }

>From a1664f6d30fa443ff4292003c2815c7c4d52a4d2 Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Thu, 20 Nov 2025 14:38:39 +0100
Subject: [PATCH 4/5] Fix option handling in driver

---
 clang/lib/Driver/ToolChains/Flang.cpp | 13 +++++++++----
 flang/test/Driver/fast-real-mod.f90   |  4 ++++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 909d7440dc362..d9d7073458183 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -822,10 +822,15 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,
                                          complexRangeKindToStr(Range)));
   }
 
-  if (Args.hasArg(options::OPT_ffast_real_mod))
-    CmdArgs.push_back("-ffast-real-mod");
-  if (Args.hasArg(options::OPT_fno_fast_real_mod))
-    CmdArgs.push_back("-fno-fast-real-mod");
+  if (llvm::opt::Arg *A =
+          Args.getLastArg(clang::options::OPT_ffast_real_mod,
+                          clang::options::OPT_fno_fast_real_mod)) {
+    if (A->getOption().matches(clang::options::OPT_ffast_real_mod))
+      CmdArgs.push_back("-ffast-real-mod");
+    else if (A->getOption().matches(
+                 clang::options::OPT_fno_fast_real_mod))
+      CmdArgs.push_back("-fno-fast-real-mod");
+  }
 
   if (!HonorINFs && !HonorNaNs && AssociativeMath && ReciprocalMath &&
       ApproxFunc && !SignedZeros &&
diff --git a/flang/test/Driver/fast-real-mod.f90 b/flang/test/Driver/fast-real-mod.f90
index 8184f334c3d85..1ab28ae8838ea 100644
--- a/flang/test/Driver/fast-real-mod.f90
+++ b/flang/test/Driver/fast-real-mod.f90
@@ -1,8 +1,12 @@
 ! RUN: %flang -ffast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-FAST-REAL-MOD
 ! RUN: %flang -fno-fast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-NO-FAST-REAL-MOD
+! RUN: %flang -fno-fast-real-mod -ffast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-FAST-REAL-MOD
+! RUN: %flang -ffast-real-mod -fno-fast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-NO-FAST-REAL-MOD
 
 ! CHECK-FAST-REAL-MOD: "-ffast-real-mod"
+! CHECK-FAST-REAL-MOD-NOT: "-fno-fast-real-mod"
 ! CHECK-NO-FAST-REAL-MOD: "-fno-fast-real-mod"
+! CHECK-NO-FAST-REAL-MOD-NOT: "-fast-real-mod"
 
 program test
     ! nothing to be done in here

>From 3580c3e17408626fa801bfd171bda9f4ad8b4ab8 Mon Sep 17 00:00:00 2001
From: Michael Klemm <michael.klemm at amd.com>
Date: Fri, 21 Nov 2025 23:32:22 +0100
Subject: [PATCH 5/5] Make clang-format happy

---
 clang/lib/Driver/ToolChains/Flang.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index d9d7073458183..cc4755cd6a9b0 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -827,8 +827,7 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,
                           clang::options::OPT_fno_fast_real_mod)) {
     if (A->getOption().matches(clang::options::OPT_ffast_real_mod))
       CmdArgs.push_back("-ffast-real-mod");
-    else if (A->getOption().matches(
-                 clang::options::OPT_fno_fast_real_mod))
+    else if (A->getOption().matches(clang::options::OPT_fno_fast_real_mod))
       CmdArgs.push_back("-fno-fast-real-mod");
   }