[flang-commits] [flang] [flang][OpenMP] Rewrite min/max with more than 2 arguments (PR #146423)
Krzysztof Parzyszek via flang-commits
flang-commits at lists.llvm.org
Tue Jul 1 07:14:25 PDT 2025
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/146423
>From 4cfcf45d8250d648bd4ae0a9110b5034d2495149 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Fri, 20 Jun 2025 06:48:18 -0500
Subject: [PATCH 1/5] [flang][OpenMP] Rewrite min/max with more than 2
arguments
Given an atomic operation `w = max(w, x1, x2, ...)` rewrite it as
`w = max(w, max(x1, x2, ...))`. This will avoid unnecessary non-atomic
comparisons inside of the atomic operation (min/max are expanded inline).
In particular, if some of the x_i's are optional dummy parameters in the
containing function, this will avoid any presence tests within the
atomic operation.
Fixes https://github.com/llvm/llvm-project/issues/144838
---
flang/lib/Lower/OpenMP/Atomic.cpp | 279 +++++++++++++++++-
flang/test/Lower/OpenMP/atomic-update.f90 | 12 +-
.../Lower/OpenMP/max-optional-parameters.f90 | 24 ++
3 files changed, 302 insertions(+), 13 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/max-optional-parameters.f90
diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp
index 33a743f8f9dda..b7a223eff80c5 100644
--- a/flang/lib/Lower/OpenMP/Atomic.cpp
+++ b/flang/lib/Lower/OpenMP/Atomic.cpp
@@ -11,6 +11,8 @@
#include "flang/Evaluate/expression.h"
#include "flang/Evaluate/fold.h"
#include "flang/Evaluate/tools.h"
+#include "flang/Evaluate/traverse.h"
+#include "flang/Evaluate/type.h"
#include "flang/Lower/AbstractConverter.h"
#include "flang/Lower/PFTBuilder.h"
#include "flang/Lower/StatementContext.h"
@@ -41,6 +43,178 @@ namespace omp {
using namespace Fortran::lower::omp;
}
+namespace {
+// An example of a type that can be used to get the return value from
+// the visitor:
+// visitor(type_identity<Xyz>) -> result_type
+using SomeArgType = evaluate::Type<common::TypeCategory::Integer, 4>;
+
+struct GetProc
+ : public evaluate::Traverse<GetProc, const evaluate::ProcedureDesignator *,
+ false> {
+ using Result = const evaluate::ProcedureDesignator *;
+ using Base = evaluate::Traverse<GetProc, Result, false>;
+ GetProc() : Base(*this) {}
+
+ using Base::operator();
+
+ static Result Default() { return nullptr; }
+
+ Result operator()(const evaluate::ProcedureDesignator &p) const { return &p; }
+ static Result Combine(Result a, Result b) { return a != nullptr ? a : b; }
+};
+
+struct WithType {
+ WithType(const evaluate::DynamicType &t) : type(t) {
+ assert(type.category() != common::TypeCategory::Derived &&
+ "Type cannot be a derived type");
+ }
+
+ template <typename VisitorTy> //
+ auto visit(VisitorTy &&visitor) const
+ -> std::invoke_result_t<VisitorTy, SomeArgType> {
+ switch (type.category()) {
+ case common::TypeCategory::Integer:
+ switch (type.kind()) {
+ case 1:
+ return visitor(llvm::type_identity<evaluate::Type<Integer, 1>>{});
+ case 2:
+ return visitor(llvm::type_identity<evaluate::Type<Integer, 2>>{});
+ case 4:
+ return visitor(llvm::type_identity<evaluate::Type<Integer, 4>>{});
+ case 8:
+ return visitor(llvm::type_identity<evaluate::Type<Integer, 8>>{});
+ case 16:
+ return visitor(llvm::type_identity<evaluate::Type<Integer, 16>>{});
+ }
+ break;
+ case common::TypeCategory::Unsigned:
+ switch (type.kind()) {
+ case 1:
+ return visitor(llvm::type_identity<evaluate::Type<Unsigned, 1>>{});
+ case 2:
+ return visitor(llvm::type_identity<evaluate::Type<Unsigned, 2>>{});
+ case 4:
+ return visitor(llvm::type_identity<evaluate::Type<Unsigned, 4>>{});
+ case 8:
+ return visitor(llvm::type_identity<evaluate::Type<Unsigned, 8>>{});
+ case 16:
+ return visitor(llvm::type_identity<evaluate::Type<Unsigned, 16>>{});
+ }
+ break;
+ case common::TypeCategory::Real:
+ switch (type.kind()) {
+ case 2:
+ return visitor(llvm::type_identity<evaluate::Type<Real, 2>>{});
+ case 3:
+ return visitor(llvm::type_identity<evaluate::Type<Real, 3>>{});
+ case 4:
+ return visitor(llvm::type_identity<evaluate::Type<Real, 4>>{});
+ case 8:
+ return visitor(llvm::type_identity<evaluate::Type<Real, 8>>{});
+ case 10:
+ return visitor(llvm::type_identity<evaluate::Type<Real, 10>>{});
+ case 16:
+ return visitor(llvm::type_identity<evaluate::Type<Real, 16>>{});
+ }
+ break;
+ case common::TypeCategory::Complex:
+ switch (type.kind()) {
+ case 2:
+ return visitor(llvm::type_identity<evaluate::Type<Complex, 2>>{});
+ case 3:
+ return visitor(llvm::type_identity<evaluate::Type<Complex, 3>>{});
+ case 4:
+ return visitor(llvm::type_identity<evaluate::Type<Complex, 4>>{});
+ case 8:
+ return visitor(llvm::type_identity<evaluate::Type<Complex, 8>>{});
+ case 10:
+ return visitor(llvm::type_identity<evaluate::Type<Complex, 10>>{});
+ case 16:
+ return visitor(llvm::type_identity<evaluate::Type<Complex, 16>>{});
+ }
+ break;
+ case common::TypeCategory::Logical:
+ switch (type.kind()) {
+ case 1:
+ return visitor(llvm::type_identity<evaluate::Type<Logical, 1>>{});
+ case 2:
+ return visitor(llvm::type_identity<evaluate::Type<Logical, 2>>{});
+ case 4:
+ return visitor(llvm::type_identity<evaluate::Type<Logical, 4>>{});
+ case 8:
+ return visitor(llvm::type_identity<evaluate::Type<Logical, 8>>{});
+ }
+ break;
+ case common::TypeCategory::Character:
+ switch (type.kind()) {
+ case 1:
+ return visitor(llvm::type_identity<evaluate::Type<Character, 1>>{});
+ case 2:
+ return visitor(llvm::type_identity<evaluate::Type<Character, 2>>{});
+ case 4:
+ return visitor(llvm::type_identity<evaluate::Type<Character, 4>>{});
+ }
+ break;
+ case common::TypeCategory::Derived:
+ break;
+ }
+ llvm_unreachable("Unhandled type");
+ }
+
+ const evaluate::DynamicType &type;
+
+private:
+ // Shorter names.
+ static constexpr auto Character = common::TypeCategory::Character;
+ static constexpr auto Complex = common::TypeCategory::Complex;
+ static constexpr auto Derived = common::TypeCategory::Derived;
+ static constexpr auto Integer = common::TypeCategory::Integer;
+ static constexpr auto Logical = common::TypeCategory::Logical;
+ static constexpr auto Real = common::TypeCategory::Real;
+ static constexpr auto Unsigned = common::TypeCategory::Unsigned;
+};
+
+template <typename T, typename U = std::remove_const_t<T>>
+U AsRvalue(T &t) {
+ U copy{t};
+ return std::move(copy);
+}
+
+template <typename T>
+T &&AsRvalue(T &&t) {
+ return std::move(t);
+}
+
+struct ArgumentReplacer
+ : public evaluate::Traverse<ArgumentReplacer, bool, false> {
+ using Base = evaluate::Traverse<ArgumentReplacer, bool, false>;
+ using Result = bool;
+
+ Result Default() const { return false; }
+
+ ArgumentReplacer(evaluate::ActualArguments &&newArgs)
+ : Base(*this), args_(std::move(newArgs)) {}
+
+ using Base::operator();
+
+ template <typename T>
+ Result operator()(const evaluate::FunctionRef<T> &x) {
+ assert(!done_);
+ auto &mut = const_cast<evaluate::FunctionRef<T> &>(x);
+ mut.arguments() = args_;
+ done_ = true;
+ return true;
+ }
+
+ Result Combine(Result &&a, Result &&b) { return a || b; }
+
+private:
+ bool done_{false};
+ evaluate::ActualArguments &&args_;
+};
+} // namespace
+
[[maybe_unused]] static void
dumpAtomicAnalysis(const parser::OpenMPAtomicConstruct::Analysis &analysis) {
auto whatStr = [](int k) {
@@ -237,6 +411,85 @@ makeMemOrderAttr(lower::AbstractConverter &converter,
return nullptr;
}
+static bool replaceArgs(semantics::SomeExpr &expr,
+ evaluate::ActualArguments &&newArgs) {
+ return ArgumentReplacer(std::move(newArgs))(expr);
+}
+
+static semantics::SomeExpr makeCall(const evaluate::DynamicType &type,
+ const evaluate::ProcedureDesignator &proc,
+ const evaluate::ActualArguments &args) {
+ return WithType(type).visit([&](auto &&s) -> semantics::SomeExpr {
+ using Type = typename llvm::remove_cvref_t<decltype(s)>::type;
+ return evaluate::AsGenericExpr(
+ evaluate::FunctionRef<Type>(AsRvalue(proc), AsRvalue(args)));
+ });
+}
+
+static const evaluate::ProcedureDesignator &
+getProcedureDesignator(const semantics::SomeExpr &call) {
+ const evaluate::ProcedureDesignator *proc = GetProc{}(call);
+ assert(proc && "Call has no procedure designator");
+ return *proc;
+}
+
+static semantics::SomeExpr //
+genReducedMinMax(const semantics::SomeExpr &orig,
+ const semantics::SomeExpr *atomArg,
+ const std::vector<semantics::SomeExpr> &args) {
+ // Take a list of arguments to a min/max operation, e.g. [a0, a1, ...]
+ // One of the a_i's, say a_t, must be atom (or a convert of atom).
+ // Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate
+ // call = min/max(a_t, tmp).
+ // Return "call".
+
+ // The min/max intrinsics have 2 mandatory arguments, the rest is optional.
+ // Make sure that the "tmp = min/max(...)" doesn't promote an optional
+ // argument to a non-optional position. This could happen if a_t is at
+ // position 0 or 1.
+ if (args.size() <= 2)
+ return orig;
+
+ evaluate::ActualArguments nonAtoms;
+
+ auto AsActual = [](const semantics::SomeExpr &x) {
+ semantics::SomeExpr copy = x;
+ return evaluate::ActualArgument(std::move(copy));
+ };
+ // Semantic checks guarantee that the "atom" shows exactly once in the
+ // argument list (with potential conversions around it).
+ // For the first two (non-optional) arguments, if "atom" is among them,
+ // replace it with another occurrence of the other non-optional argument.
+ if (atomArg == &args[0]) {
+ // (atom, x, y...) -> (x, x, y...)
+ nonAtoms.push_back(AsActual(args[1]));
+ nonAtoms.push_back(AsActual(args[1]));
+ } else if (atomArg == &args[1]) {
+ // (x, atom, y...) -> (x, x, y...)
+ nonAtoms.push_back(AsActual(args[0]));
+ nonAtoms.push_back(AsActual(args[0]));
+ } else {
+ // (x, y, z...) -> unchanged
+ nonAtoms.push_back(AsActual(args[0]));
+ nonAtoms.push_back(AsActual(args[1]));
+ }
+
+ // The rest of arguments are optional, so we can just skip "atom".
+ for (size_t i = 2, e = args.size(); i != e; ++i) {
+ if (atomArg != &args[i])
+ nonAtoms.push_back(AsActual(args[i]));
+ }
+
+ // The type of the intermediate min/max is the same as the type of its
+ // arguments, which may be different from the type of the original
+ // expression. The original expression may have additional coverts.
+ auto tmp =
+ makeCall(*atomArg->GetType(), getProcedureDesignator(orig), nonAtoms);
+ semantics::SomeExpr call = orig;
+ replaceArgs(call, {AsActual(*atomArg), AsActual(tmp)});
+ return call;
+}
+
static mlir::Operation * //
genAtomicRead(lower::AbstractConverter &converter,
semantics::SemanticsContext &semaCtx, mlir::Location loc,
@@ -350,10 +603,28 @@ genAtomicUpdate(lower::AbstractConverter &converter,
mlir::Type atomType = fir::unwrapRefType(atomAddr.getType());
// This must exist by now.
- semantics::SomeExpr input = *evaluate::GetConvertInput(assign.rhs);
- std::vector<semantics::SomeExpr> args =
- evaluate::GetTopLevelOperation(input).second;
+ semantics::SomeExpr rhs = assign.rhs;
+ semantics::SomeExpr input = *evaluate::GetConvertInput(rhs);
+ auto [opcode, args] = evaluate::GetTopLevelOperation(input);
assert(!args.empty() && "Update operation without arguments");
+
+ const semantics::SomeExpr *atomArg = [&]() {
+ for (const semantics::SomeExpr &e : args) {
+ if (evaluate::IsSameOrConvertOf(e, atom))
+ return &e;
+ }
+ llvm_unreachable("Atomic variable not in argument list");
+ }();
+
+ if (opcode == evaluate::operation::Operator::Min ||
+ opcode == evaluate::operation::Operator::Max) {
+ // Min and max operations are expanded inline, so reduce them to
+ // operations with exactly two (non-optional) arguments.
+ rhs = genReducedMinMax(rhs, atomArg, args);
+ input = *evaluate::GetConvertInput(rhs);
+ std::tie(opcode, args) = evaluate::GetTopLevelOperation(input);
+ atomArg = nullptr; // No longer valid.
+ }
for (auto &arg : args) {
if (!evaluate::IsSameOrConvertOf(arg, atom)) {
mlir::Value val = fir::getBase(converter.genExprValue(arg, naCtx, &loc));
@@ -372,7 +643,7 @@ genAtomicUpdate(lower::AbstractConverter &converter,
converter.overrideExprValues(&overrides);
mlir::Value updated =
- fir::getBase(converter.genExprValue(assign.rhs, stmtCtx, &loc));
+ fir::getBase(converter.genExprValue(rhs, stmtCtx, &loc));
mlir::Value converted = builder.createConvert(loc, atomType, updated);
builder.create<mlir::omp::YieldOp>(loc, converted);
converter.resetExprOverrides();
diff --git a/flang/test/Lower/OpenMP/atomic-update.f90 b/flang/test/Lower/OpenMP/atomic-update.f90
index 3f840acefa6e8..f88bbea6fca85 100644
--- a/flang/test/Lower/OpenMP/atomic-update.f90
+++ b/flang/test/Lower/OpenMP/atomic-update.f90
@@ -107,8 +107,6 @@ program OmpAtomicUpdate
!CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_Y_DECLARE]]#0 : !fir.ref<i32> {
!CHECK: ^bb0(%[[ARG:.*]]: i32):
!CHECK: {{.*}} = arith.cmpi sgt, %[[ARG]], {{.*}} : i32
-!CHECK: {{.*}} = arith.select {{.*}}, %[[ARG]], {{.*}} : i32
-!CHECK: {{.*}} = arith.cmpi sgt, {{.*}}
!CHECK: %[[TEMP:.*]] = arith.select {{.*}} : i32
!CHECK: omp.yield(%[[TEMP]] : i32)
!CHECK: }
@@ -177,13 +175,9 @@ program OmpAtomicUpdate
!CHECK: %[[VAL_Z_LOADED:.*]] = fir.load %[[VAL_Z_DECLARE]]#0 : !fir.ref<i32>
!CHECK: omp.atomic.update %[[VAL_W_DECLARE]]#0 : !fir.ref<i32> {
!CHECK: ^bb0(%[[ARG_W:.*]]: i32):
-!CHECK: %[[WX_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], %[[VAL_X_LOADED]] : i32
-!CHECK: %[[WX_MIN:.*]] = arith.select %[[WX_CMP]], %[[ARG_W]], %[[VAL_X_LOADED]] : i32
-!CHECK: %[[WXY_CMP:.*]] = arith.cmpi sgt, %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32
-!CHECK: %[[WXY_MIN:.*]] = arith.select %[[WXY_CMP]], %[[WX_MIN]], %[[VAL_Y_LOADED]] : i32
-!CHECK: %[[WXYZ_CMP:.*]] = arith.cmpi sgt, %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32
-!CHECK: %[[WXYZ_MIN:.*]] = arith.select %[[WXYZ_CMP]], %[[WXY_MIN]], %[[VAL_Z_LOADED]] : i32
-!CHECK: omp.yield(%[[WXYZ_MIN]] : i32)
+!CHECK: %[[W_CMP:.*]] = arith.cmpi sgt, %[[ARG_W]], {{.*}} : i32
+!CHECK: %[[WXYZ_MAX:.*]] = arith.select %[[W_CMP]], %[[ARG_W]], {{.*}} : i32
+!CHECK: omp.yield(%[[WXYZ_MAX]] : i32)
!CHECK: }
!$omp atomic update
w = max(w,x,y,z)
diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90
new file mode 100644
index 0000000000000..2bb6b110cb0ef
--- /dev/null
+++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90
@@ -0,0 +1,24 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 %s -o - | FileCheck %s
+
+! Check that the presence tests are done outside of the atomic update
+! construct.
+
+!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present
+!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) {
+!CHECK: fir.result {{.*}} : f32
+!CHECK: } else {
+!CHECK: fir.result {{.*}} : f32
+!CHECK: }
+!CHECK: omp.atomic.update {{.*}} : !fir.ref<f32> {
+!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32):
+!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]]
+!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]]
+!CHECK: omp.yield(%[[V11]] : f32)
+!CHECK: }
+
+subroutine f00(a, x, y)
+ real :: a
+ real, optional :: x, y
+ !$omp atomic update
+ a = max(x, a, y)
+end
>From 4302ed81021028baec7427028562802a516ea511 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 1 Jul 2025 07:38:45 -0500
Subject: [PATCH 2/5] Address review comments
---
flang/lib/Lower/OpenMP/Atomic.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp
index b7a223eff80c5..2ab91b239a3cc 100644
--- a/flang/lib/Lower/OpenMP/Atomic.cpp
+++ b/flang/lib/Lower/OpenMP/Atomic.cpp
@@ -157,6 +157,7 @@ struct WithType {
}
break;
case common::TypeCategory::Derived:
+ (void)Derived;
break;
}
llvm_unreachable("Unhandled type");
@@ -438,7 +439,7 @@ genReducedMinMax(const semantics::SomeExpr &orig,
const semantics::SomeExpr *atomArg,
const std::vector<semantics::SomeExpr> &args) {
// Take a list of arguments to a min/max operation, e.g. [a0, a1, ...]
- // One of the a_i's, say a_t, must be atom (or a convert of atom).
+ // One of the a_i's, say a_t, must be atomArg.
// Generate tmp = min/max(a0, a1, ... [except a_t]). Then generate
// call = min/max(a_t, tmp).
// Return "call".
@@ -608,13 +609,14 @@ genAtomicUpdate(lower::AbstractConverter &converter,
auto [opcode, args] = evaluate::GetTopLevelOperation(input);
assert(!args.empty() && "Update operation without arguments");
- const semantics::SomeExpr *atomArg = [&]() {
+ // Pass args as an argument to avoid capturing a structured binding.
+ const semantics::SomeExpr *atomArg = [&](auto &args) {
for (const semantics::SomeExpr &e : args) {
if (evaluate::IsSameOrConvertOf(e, atom))
return &e;
}
llvm_unreachable("Atomic variable not in argument list");
- }();
+ }(args);
if (opcode == evaluate::operation::Operator::Min ||
opcode == evaluate::operation::Operator::Max) {
>From 16df2e0eb775fce156db6f5fc62509026a0b2bc5 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 1 Jul 2025 08:01:23 -0500
Subject: [PATCH 3/5] Check for non-atomic operations, add min
---
.../Lower/OpenMP/max-optional-parameters.f90 | 58 ++++++++++++++++---
1 file changed, 51 insertions(+), 7 deletions(-)
diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90
index 2bb6b110cb0ef..57a432eeeecec 100644
--- a/flang/test/Lower/OpenMP/max-optional-parameters.f90
+++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90
@@ -3,16 +3,27 @@
! Check that the presence tests are done outside of the atomic update
! construct.
-!CHECK: %[[IS_PRESENT:[a-z0-9]+]] = fir.is_present
-!CHECK: %[[IF_VAL:[a-z0-9]+]] = fir.if %[[IS_PRESENT]] -> (f32) {
-!CHECK: fir.result {{.*}} : f32
+!CHECK-LABEL: func.func @_QPf00
+!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0
+!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0
+!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0
+!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref<f32>
+!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref<f32>
+!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref<f32>) -> i1
+!CHECK: %[[V7:[0-9]+]] = arith.cmpf ogt, %[[V4]], %[[V5]] fastmath<contract> : f32
+!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : f32
+!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (f32) {
+!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref<f32>
+!CHECK: %[[V11:[0-9]+]] = arith.cmpf ogt, %[[V8]], %[[V10]] fastmath<contract> : f32
+!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : f32
+!CHECK: fir.result %[[V12]] : f32
!CHECK: } else {
-!CHECK: fir.result {{.*}} : f32
+!CHECK: fir.result %[[V8]] : f32
!CHECK: }
-!CHECK: omp.atomic.update {{.*}} : !fir.ref<f32> {
+!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref<f32> {
!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32):
-!CHECK: %[[V10:[a-z0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[IF_VAL]]
-!CHECK: %[[V11:[a-z0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[IF_VAL]]
+!CHECK: %[[V10:[0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[V9]] fastmath<contract> : f32
+!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : f32
!CHECK: omp.yield(%[[V11]] : f32)
!CHECK: }
@@ -22,3 +33,36 @@ subroutine f00(a, x, y)
!$omp atomic update
a = max(x, a, y)
end
+
+
+!CHECK-LABEL: func.func @_QPf01
+!CHECK: %[[VAL_A:[0-9]+]]:2 = hlfir.declare %arg0 dummy_scope %0
+!CHECK: %[[VAL_X:[0-9]+]]:2 = hlfir.declare %arg1 dummy_scope %0
+!CHECK: %[[VAL_Y:[0-9]+]]:2 = hlfir.declare %arg2 dummy_scope %0
+!CHECK: %[[V4:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref<i32>
+!CHECK: %[[V5:[0-9]+]] = fir.load %[[VAL_X]]#0 : !fir.ref<i32>
+!CHECK: %[[V6:[0-9]+]] = fir.is_present %[[VAL_Y]]#0 : (!fir.ref<i32>) -> i1
+!CHECK: %[[V7:[0-9]+]] = arith.cmpi slt, %[[V4]], %[[V5]] : i32
+!CHECK: %[[V8:[0-9]+]] = arith.select %[[V7]], %[[V4]], %[[V5]] : i32
+!CHECK: %[[V9:[0-9]+]] = fir.if %[[V6]] -> (i32) {
+!CHECK: %[[V10:[0-9]+]] = fir.load %[[VAL_Y]]#0 : !fir.ref<i32>
+!CHECK: %[[V11:[0-9]+]] = arith.cmpi slt, %[[V8]], %[[V10]] : i32
+!CHECK: %[[V12:[0-9]+]] = arith.select %[[V11]], %[[V8]], %[[V10]] : i32
+!CHECK: fir.result %[[V12]] : i32
+!CHECK: } else {
+!CHECK: fir.result %[[V8]] : i32
+!CHECK: }
+!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref<i32> {
+!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: i32):
+!CHECK: %[[V10:[0-9]+]] = arith.cmpi slt, %[[ARG]], %[[V9]] : i32
+!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : i32
+!CHECK: omp.yield(%[[V11]] : i32)
+!CHECK: }
+
+subroutine f01(a, x, y)
+ integer :: a
+ integer, optional :: x, y
+ !$omp atomic update
+ a = min(x, a, y)
+end
+
>From 6c45f7c32a4919ee950703377fa1b5c8f0257fc2 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 1 Jul 2025 09:13:35 -0500
Subject: [PATCH 4/5] Fix test
---
flang/test/Lower/OpenMP/max-optional-parameters.f90 | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/max-optional-parameters.f90
index 57a432eeeecec..418a3cad8cdaf 100644
--- a/flang/test/Lower/OpenMP/max-optional-parameters.f90
+++ b/flang/test/Lower/OpenMP/max-optional-parameters.f90
@@ -20,7 +20,7 @@
!CHECK: } else {
!CHECK: fir.result %[[V8]] : f32
!CHECK: }
-!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref<f32> {
+!CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_A]]#0 : !fir.ref<f32> {
!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: f32):
!CHECK: %[[V10:[0-9]+]] = arith.cmpf ogt, %[[ARG]], %[[V9]] fastmath<contract> : f32
!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : f32
@@ -52,7 +52,7 @@ subroutine f00(a, x, y)
!CHECK: } else {
!CHECK: fir.result %[[V8]] : i32
!CHECK: }
-!CHECK: omp.atomic.update %[[VAL_A]]#0 : !fir.ref<i32> {
+!CHECK: omp.atomic.update memory_order(relaxed) %[[VAL_A]]#0 : !fir.ref<i32> {
!CHECK: ^bb0(%[[ARG:[a-z0-9]+]]: i32):
!CHECK: %[[V10:[0-9]+]] = arith.cmpi slt, %[[ARG]], %[[V9]] : i32
!CHECK: %[[V11:[0-9]+]] = arith.select %[[V10]], %[[ARG]], %[[V9]] : i32
>From 3aa831fe921cb2ff40d24206dc2d98909f681e94 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 1 Jul 2025 09:13:59 -0500
Subject: [PATCH 5/5] Rename test
---
...max-optional-parameters.f90 => minmax-optional-parameters.f90} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename flang/test/Lower/OpenMP/{max-optional-parameters.f90 => minmax-optional-parameters.f90} (100%)
diff --git a/flang/test/Lower/OpenMP/max-optional-parameters.f90 b/flang/test/Lower/OpenMP/minmax-optional-parameters.f90
similarity index 100%
rename from flang/test/Lower/OpenMP/max-optional-parameters.f90
rename to flang/test/Lower/OpenMP/minmax-optional-parameters.f90
More information about the flang-commits
mailing list