[flang-commits] [flang] e7b6660 - [flang] Add -ffast-math and -Ofast

Fri Dec 9 11:57:00 PST 2022

Author: Tom Eccles
Date: 2022-12-09T19:55:58Z
New Revision: e7b6660243d1a4548f7aaac992b777ef0f0ba5b7

URL: https://github.com/llvm/llvm-project/commit/e7b6660243d1a4548f7aaac992b777ef0f0ba5b7
DIFF: https://github.com/llvm/llvm-project/commit/e7b6660243d1a4548f7aaac992b777ef0f0ba5b7.diff

LOG: [flang] Add -ffast-math and -Ofast

clang -cc1 accepts -Ofast. I did not add it to flang -fc1 because this
seems redundant because the compiler driver will always resolve -Ofast
into -O3 -ffast-math (I added a test for this).

-menable-infs is removed from the frontend-forwarding test because if
all of the fast-math component flags are present, these will be resolved
into the fast-math flag. Instead -menable-infs is tested in the
fast-math test.

Specifying -ffast-math to the compiler driver causes linker invocations
to include crtfastmath.o.

RFC: https://discourse.llvm.org/t/rfc-the-meaning-of-ofast/66554

Differential Revision: https://reviews.llvm.org/D138675

Added: 
    flang/test/Driver/fast_math.f90

Modified: 
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/ToolChains/Flang.cpp
    flang/docs/FlangDriver.md
    flang/lib/Frontend/CompilerInvocation.cpp
    flang/test/Driver/driver-help-hidden.f90
    flang/test/Driver/driver-help.f90
    flang/test/Driver/frontend-forwarding.f90
    flang/test/Lower/fast-math-arithmetic.f90

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index fd0ca5f94a515..ba2010e743459 100644

--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -741,7 +741,7 @@ def ObjC : Flag<["-"], "ObjC">, Flags<[NoXarchOption]>,
   HelpText<"Treat source input files as Objective-C inputs">;
 def O : Joined<["-"], "O">, Group<O_Group>, Flags<[CC1Option,FC1Option]>;
 def O_flag : Flag<["-"], "O">, Flags<[CC1Option,FC1Option]>, Alias<O>, AliasArgs<["1"]>;
-def Ofast : Joined<["-"], "Ofast">, Group<O_Group>, Flags<[CC1Option]>;
+def Ofast : Joined<["-"], "Ofast">, Group<O_Group>, Flags<[CC1Option, FlangOption]>;
 def P : Flag<["-"], "P">, Flags<[CC1Option,FlangOption,FC1Option]>, Group<Preprocessor_Group>,
   HelpText<"Disable linemarker output in -E mode">,
   MarshallingInfoNegativeFlag<PreprocessorOutputOpts<"ShowLineMarkers">>;
@@ -1603,7 +1603,7 @@ def ffp_exception_behavior_EQ : Joined<["-"], "ffp-exception-behavior=">, Group<
   MarshallingInfoEnum<LangOpts<"FPExceptionMode">, "FPE_Default">;
 defm fast_math : BoolFOption<"fast-math",
   LangOpts<"FastMath">, DefaultFalse,
-  PosFlag<SetTrue, [CC1Option], "Allow aggressive, lossy floating-point optimizations",
+  PosFlag<SetTrue, [CC1Option, FC1Option, FlangOption], "Allow aggressive, lossy floating-point optimizations",
           [cl_fast_relaxed_math.KeyPath]>,
   NegFlag<SetFalse>>;
 defm math_errno : BoolFOption<"math-errno",

diff  --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index b2b08f960842e..4ee046be9ea97 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -176,12 +176,43 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,
     case options::OPT_fno_reciprocal_math:
       ReciprocalMath = false;
       break;
+    case options::OPT_Ofast:
+      [[fallthrough]];
+    case options::OPT_ffast_math:
+      HonorINFs = false;
+      HonorNaNs = false;
+      AssociativeMath = true;
+      ReciprocalMath = true;
+      ApproxFunc = true;
+      SignedZeros = false;
+      FPContract = "fast";
+      break;
+    case options::OPT_fno_fast_math:
+      HonorINFs = true;
+      HonorNaNs = true;
+      AssociativeMath = false;
+      ReciprocalMath = false;
+      ApproxFunc = false;
+      SignedZeros = true;
+      // -fno-fast-math should undo -ffast-math so I return FPContract to the
+      // default. It is important to check it is "fast" (the default) so that
+      // --ffp-contract=off -fno-fast-math --> -ffp-contract=off
+      if (FPContract == "fast")
+        FPContract = "";
+      break;
     }
 
     // If we handled this option claim it
     A->claim();
   }
 
+  if (!HonorINFs && !HonorNaNs && AssociativeMath && ReciprocalMath &&
+      ApproxFunc && !SignedZeros &&
+      (FPContract == "fast" || FPContract == "")) {
+    CmdArgs.push_back("-ffast-math");
+    return;
+  }
+
   if (!FPContract.empty())
     CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract));
 
@@ -295,6 +326,8 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
     if (A->getOption().matches(options::OPT_O4)) {
       CmdArgs.push_back("-O3");
       D.Diag(diag::warn_O4_is_O3);
+    } else if (A->getOption().matches(options::OPT_Ofast)) {
+      CmdArgs.push_back("-O3");
     } else {
       A->render(Args, CmdArgs);
     }

diff  --git a/flang/docs/FlangDriver.md b/flang/docs/FlangDriver.md
index 7e182b75d477e..d3db110565f42 100644
--- a/flang/docs/FlangDriver.md
+++ b/flang/docs/FlangDriver.md
@@ -544,3 +544,56 @@ into `flang-new` as built-in middle-end passes.
 See the
 [`WritingAnLLVMNewPMPass`](https://llvm.org/docs/WritingAnLLVMNewPMPass.html#id9)
 documentation for more details.
+
+## Ofast and Fast Math
+`-Ofast` in Flang means `-O3 -ffast-math`. `-fstack-arrays` will be added to
+`-Ofast` in the future (https://github.com/llvm/llvm-project/issues/59231).
+
+`-ffast-math` means the following:
+ - `-fno-honor-infinities`
+ - `-fno-honor-nans`
+ - `-fassociative-math`
+ - `-freciprocal-math`
+ - `-fapprox-func`
+ - `-fno-signed-zeros`
+ - `-ffp-contract=fast`
+
+These correspond to LLVM IR Fast Math attributes:
+https://llvm.org/docs/LangRef.html#fast-math-flags
+
+When `-ffast-math` is specified, any linker steps generated by the compiler
+driver will also link to `crtfastmath.o`, which adds a static constructor
+that sets the FTZ/DAZ bits in MXCSR, affecting not only the current only the
+current compilation unit but all static and shared libraries included in the
+program. Setting these bits causes denormal floating point numbers to be flushed
+to zero.
+
+### Comparison with GCC/GFortran
+GCC/GFortran translate `-Ofast` to
+`-O3 -ffast-math -fstack-arrays -fno-semantic-interposition`. `-fstack-arrays`
+is TODO for Flang.
+`-fno-semantic-interposition` is not used because clang does not enable this as
+part of `-Ofast` as the default behaviour is similar.
+
+GCC/GFortran has a wider definition of `-ffast-math`: also including
+`-fno-trapping-math`,  `-fno-rounding-math`, and  `-fsignaling-nans`; these
+aren't included in Flang because Flang currently has no support for strict
+floating point and so always acts as though these flags were specified.
+
+GCC/GFortran will also set flush-to-zero mode: linking `crtfastmath.o`, the same
+as Flang.
+
+### Comparison with nvfortran
+nvfortran defines `-fast` as
+`-O2 -Munroll=c:1 -Mnoframe -Mlre -Mpre -Mvect=simd -Mcache_align -Mflushz -Mvect`.
+ - `-O2 -Munroll=c:1 -Mlre -Mautoinline -Mpre -Mvect-simd` affect code
+   optimization. `flang -O3` should enable all optimizations for execution time,
+   similarly to `clang -O3`. The `-O3` pipeline has passes that perform
+   transformations like inlining, vectorisation, unrolling, etc. Additionally,
+   the GVN and LICM passes perform redundancy elimination like `Mpre` and `Mlre`
+ - `-Mnoframe`: the equivalent flag would be `-fomit-frame-pointer`. This flag
+   is not yet supported in Flang and so Flang follows GFortran in not including
+   this in `-Ofast`. There is no plan to include this flag as part of `-Ofast`.
+ - `-Mcache_align`: there is no equivalent flag in Flang or Clang.
+ - `-Mflushz`: flush-to-zero mode - when `-ffast-math` is specified, Flang will
+   link to `crtfastmath.o` to ensure denormal numbers are flushed to zero.

diff  --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index e5fa38efae170..33ec6af3a2276 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -728,6 +728,16 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc,
     opts.ReciprocalMath = true;
   }
 
+  if (args.getLastArg(clang::driver::options::OPT_ffast_math)) {
+    opts.NoHonorInfs = true;
+    opts.NoHonorNaNs = true;
+    opts.AssociativeMath = true;
+    opts.ReciprocalMath = true;
+    opts.ApproxFunc = true;
+    opts.NoSignedZeros = true;
+    opts.setFPContractMode(LangOptions::FPM_Fast);
+  }
+
   return true;
 }
 

diff  --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90
index e079bb66cfa5c..a5821435f39b8 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -29,6 +29,7 @@
 ! CHECK-NEXT: -fdefault-double-8     Set the default double precision kind to an 8 byte wide type
 ! CHECK-NEXT: -fdefault-integer-8    Set the default integer kind to an 8 byte wide type
 ! CHECK-NEXT: -fdefault-real-8       Set the default real kind to an 8 byte wide type
+! CHECK-NEXT: -ffast-math            Allow aggressive, lossy floating-point optimizations
 ! CHECK-NEXT: -ffixed-form           Process source files in fixed form
 ! CHECK-NEXT: -ffixed-line-length=<value>
 ! CHECK-NEXT: Use <value> as character line width in fixed mode

diff  --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90
index d54ed7b288bc5..dffc20fc667f3 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -29,6 +29,7 @@
 ! HELP-NEXT: -fdefault-double-8     Set the default double precision kind to an 8 byte wide type
 ! HELP-NEXT: -fdefault-integer-8    Set the default integer kind to an 8 byte wide type
 ! HELP-NEXT: -fdefault-real-8       Set the default real kind to an 8 byte wide type
+! HELP-NEXT: -ffast-math            Allow aggressive, lossy floating-point optimizations
 ! HELP-NEXT: -ffixed-form           Process source files in fixed form
 ! HELP-NEXT: -ffixed-line-length=<value>
 ! HELP-NEXT: Use <value> as character line width in fixed mode
@@ -108,6 +109,7 @@
 ! HELP-FC1-NEXT: -fdefault-double-8  Set the default double precision kind to an 8 byte wide type
 ! HELP-FC1-NEXT: -fdefault-integer-8 Set the default integer kind to an 8 byte wide type
 ! HELP-FC1-NEXT: -fdefault-real-8    Set the default real kind to an 8 byte wide type
+! HELP-FC1-NEXT: -ffast-math            Allow aggressive, lossy floating-point optimizations
 ! HELP-FC1-NEXT: -ffixed-form           Process source files in fixed form
 ! HELP-FC1-NEXT: -ffixed-line-length=<value>
 ! HELP-FC1-NEXT: Use <value> as character line width in fixed mode

diff  --git a/flang/test/Driver/fast_math.f90 b/flang/test/Driver/fast_math.f90
new file mode 100644
index 0000000000000..11c7e9beea619
--- /dev/null
+++ b/flang/test/Driver/fast_math.f90
@@ -0,0 +1,65 @@
+! Test for correct forwarding of fast-math flags from the compiler driver to the
+! frontend driver
+
+! -Ofast => -ffast-math -O3
+! RUN: %flang -Ofast -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:     | FileCheck --check-prefix=CHECK-OFAST %s
+! CHECK-OFAST: -fc1
+! CHECK-OFAST-SAME: -ffast-math
+! CHECK-OFAST-SAME: -O3
+
+! TODO: update once -fstack-arays is added
+! RUN: %flang -fstack-arrays -fsyntax-only %s -o %t 2>&1 \
+! RUN:     | FileCheck --check-prefix=CHECK-STACK-ARRAYS %s
+! CHECK-STACK-ARRAYS: warning: argument unused during compilation: '-fstack-arrays'
+
+! -Ofast -fno-fast-math => -O3
+! RUN: %flang -Ofast -fno-fast-math -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:     | FileCheck --check-prefix=CHECK-OFAST-NO-FAST %s
+! CHECK-OFAST-NO-FAST: -fc1
+! CHECK-OFAST-NO-FAST-NOT: -ffast-math
+! CHECK-OFAST-NO-FAST-SAME: -O3
+
+! -ffast-math => -ffast-math
+! RUN: %flang -ffast-math -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:     | FileCheck --check-prefix=CHECK-FFAST %s
+! CHECK-FFAST: -fc1
+! CHECK-FFAST-SAME: -ffast-math
+
+! (component flags) => -ffast-math
+! RUN: %flang -fsyntax-only -### %s -o %t \
+! RUN:     -fno-honor-infinities \
+! RUN:     -fno-honor-nans \
+! RUN:     -fassociative-math \
+! RUN:     -freciprocal-math \
+! RUN:     -fapprox-func \
+! RUN:     -fno-signed-zeros \
+! RUN:     -ffp-contract=fast \
+! RUN:     2>&1 | FileCheck --check-prefix=CHECK-FROM-COMPS %s
+! CHECK-FROM-COMPS: -fc1
+! CHECK-FROM-COMPS-SAME: -ffast-math
+
+! -ffast-math (followed by an alteration) => (component flags)
+! RUN: %flang -ffast-math -fhonor-infinities -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:     | FileCheck --check-prefix=CHECK-TO-COMPS %s
+! CHECK-TO-COMPS: -fc1
+! CHECK-TO-COMPS-SAME: -ffp-contract=fast
+! CHECK-TO-COMPS-SAME: -menable-no-nans
+! CHECK-TO-COMPS-SAME: -fapprox-func
+! CHECK-TO-COMPS-SAME: -fno-signed-zeros
+! CHECK-TO-COMPS-SAME: -mreassociate
+! CHECK-TO-COMPS-SAME: -freciprocal-math
+
+! Check that -fno-fast-math doesn't clobber -ffp-contract
+! RUN: %flang -ffp-contract=off -fno-fast-math -fsyntax-only -### %s -o %t 2>&1 \
+! RUN:     | FileCheck --check-prefix=CHECK-CONTRACT %s
+! CHECK-CONTRACT: -fc1
+! CHECK-CONTRACT-SAME: -ffp-contract=off
+
+! Check that -ffast-math causes us to link to crtfastmath.o
+! UNSUPPORTED: system-windows
+! RUN: %flang -ffast-math -### %s -o %t 2>&1 \
+! RUN:     | FileCheck --check-prefix=CHECK-CRT %s
+! CHECK-CRT: crtbeginS.o
+! CHECK-CRT-SAME: crtfastmath.o
+! CHECK-CRT-SAME: crtendS.o

diff  --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90
index 2c81b518ddd33..beb2a85e76f54 100644
--- a/flang/test/Driver/frontend-forwarding.f90
+++ b/flang/test/Driver/frontend-forwarding.f90
@@ -9,7 +9,6 @@
 ! RUN:     -flarge-sizes \
 ! RUN:     -fconvert=little-endian \
 ! RUN:     -ffp-contract=fast \
-! RUN:     -fno-honor-infinities \
 ! RUN:     -fno-honor-nans \
 ! RUN:     -fapprox-func \
 ! RUN:     -fno-signed-zeros \
@@ -27,7 +26,6 @@
 ! CHECK: "-fdefault-real-8"
 ! CHECK: "-flarge-sizes"
 ! CHECK: "-ffp-contract=fast"
-! CHECK: "-menable-no-infs"
 ! CHECK: "-menable-no-nans"
 ! CHECK: "-fapprox-func"
 ! CHECK: "-fno-signed-zeros"

diff  --git a/flang/test/Lower/fast-math-arithmetic.f90 b/flang/test/Lower/fast-math-arithmetic.f90
index 3a94606c52823..fe90fdc3e6246 100644
--- a/flang/test/Lower/fast-math-arithmetic.f90
+++ b/flang/test/Lower/fast-math-arithmetic.f90
@@ -6,6 +6,7 @@
 ! RUN: %flang_fc1 -emit-fir -mreassociate -ffp-contract=off %s -o - 2>&1 | FileCheck --check-prefixes=REASSOC,ALL %s
 ! RUN: %flang_fc1 -emit-fir -freciprocal-math -ffp-contract=off %s -o - 2>&1 | FileCheck --check-prefixes=ARCP,ALL %s
 ! RUN: %flang_fc1 -emit-fir -ffp-contract=fast -menable-no-infs -menable-no-nans -fapprox-func -fno-signed-zeros -mreassociate -freciprocal-math %s -o - 2>&1 | FileCheck --check-prefixes=FAST,ALL %s
+! RUN: %flang_fc1 -emit-fir -ffast-math %s -o - 2>&1 | FileCheck --check-prefixes=FAST,ALL %s
 
 ! ALL-LABEL: func.func @_QPtest
 subroutine test(x)