[flang-commits] [flang] [llvm] [mlir] [flang][llvm][OpenMP][OpenACC] Add implicit casts to omp.atomic and acc.atomic (PR #131603)
via flang-commits
flang-commits at lists.llvm.org
Thu May 1 05:00:32 PDT 2025
https://github.com/NimishMishra updated https://github.com/llvm/llvm-project/pull/131603
>From d00abc7a026b5b17ac2ec6e10cfae2d866288d51 Mon Sep 17 00:00:00 2001
From: Nimish Mishra <neelam.nimish at gmail.com>
Date: Thu, 1 May 2025 17:29:56 +0530
Subject: [PATCH] [flang][llvm][OpenMP] Add implicit casts to omp.atomic
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 56 ++++++++++++++++++-
.../Todo/atomic-capture-implicit-cast.f90 | 48 ++++++++++++++++
.../Lower/OpenMP/atomic-implicit-cast.f90 | 56 +++++++++++++++++++
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 31 ----------
mlir/test/Target/LLVMIR/openmp-llvm.mlir | 21 +++----
5 files changed, 164 insertions(+), 48 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/Todo/atomic-capture-implicit-cast.f90
create mode 100644 flang/test/Lower/OpenMP/atomic-implicit-cast.f90
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index f099028c23323..fe5aa994a76bd 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2889,9 +2889,55 @@ static void genAtomicRead(lower::AbstractConverter &converter,
fir::getBase(converter.genExprAddr(fromExpr, stmtCtx));
mlir::Value toAddress = fir::getBase(converter.genExprAddr(
*semantics::GetExpr(assignmentStmtVariable), stmtCtx));
- genAtomicCaptureStatement(converter, fromAddress, toAddress,
- leftHandClauseList, rightHandClauseList,
- elementType, loc);
+
+ if (fromAddress.getType() != toAddress.getType()) {
+ // Emit an implicit cast
+ mlir::Type toType = fir::unwrapRefType(toAddress.getType());
+ mlir::Type fromType = fir::unwrapRefType(fromAddress.getType());
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+ auto oldIP = builder.saveInsertionPoint();
+ builder.setInsertionPointToStart(builder.getAllocaBlock());
+ mlir::Value alloca = builder.create<fir::AllocaOp>(loc, fromType);
+ builder.restoreInsertionPoint(oldIP);
+ genAtomicCaptureStatement(converter, fromAddress, alloca,
+ leftHandClauseList, rightHandClauseList,
+ elementType, loc);
+ auto load = builder.create<fir::LoadOp>(loc, alloca);
+ if (fir::isa_complex(fromType) && !fir::isa_complex(toType)) {
+ // Emit an additional `ExtractValueOp` if `fromAddress` is of complex
+ // type, but `toAddress` is not.
+
+ auto extract = builder.create<fir::ExtractValueOp>(
+ loc, mlir::cast<mlir::ComplexType>(fromType).getElementType(), load,
+ builder.getArrayAttr(
+ builder.getIntegerAttr(builder.getIndexType(), 0)));
+ auto cvt = builder.create<fir::ConvertOp>(loc, toType, extract);
+ builder.create<fir::StoreOp>(loc, cvt, toAddress);
+ } else if (!fir::isa_complex(fromType) && fir::isa_complex(toType)) {
+ // Emit an additional `InsertValueOp` if `toAddress` is of complex
+ // type, but `fromAddress` is not.
+ mlir::Value undef = builder.create<fir::UndefOp>(loc, toType);
+ mlir::Type complexEleTy =
+ mlir::cast<mlir::ComplexType>(toType).getElementType();
+ mlir::Value cvt = builder.create<fir::ConvertOp>(loc, complexEleTy, load);
+ mlir::Value zero = builder.createRealZeroConstant(loc, complexEleTy);
+ mlir::Value idx0 = builder.create<fir::InsertValueOp>(
+ loc, toType, undef, cvt,
+ builder.getArrayAttr(
+ builder.getIntegerAttr(builder.getIndexType(), 0)));
+ mlir::Value idx1 = builder.create<fir::InsertValueOp>(
+ loc, toType, idx0, zero,
+ builder.getArrayAttr(
+ builder.getIntegerAttr(builder.getIndexType(), 1)));
+ builder.create<fir::StoreOp>(loc, idx1, toAddress);
+ } else {
+ auto cvt = builder.create<fir::ConvertOp>(loc, toType, load);
+ builder.create<fir::StoreOp>(loc, cvt, toAddress);
+ }
+ } else
+ genAtomicCaptureStatement(converter, fromAddress, toAddress,
+ leftHandClauseList, rightHandClauseList,
+ elementType, loc);
}
/// Processes an atomic construct with update clause.
@@ -2976,6 +3022,10 @@ static void genAtomicCapture(lower::AbstractConverter &converter,
mlir::Type stmt2VarType =
fir::getBase(converter.genExprValue(assign2.lhs, stmtCtx)).getType();
+ // Check if implicit type is needed
+ if (stmt1VarType != stmt2VarType)
+ TODO(loc, "atomic capture requiring implicit type casts");
+
mlir::Operation *atomicCaptureOp = nullptr;
mlir::IntegerAttr hint = nullptr;
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
diff --git a/flang/test/Lower/OpenMP/Todo/atomic-capture-implicit-cast.f90 b/flang/test/Lower/OpenMP/Todo/atomic-capture-implicit-cast.f90
new file mode 100644
index 0000000000000..5b61f1169308f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/atomic-capture-implicit-cast.f90
@@ -0,0 +1,48 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: atomic capture requiring implicit type casts
+subroutine capture_with_convert_f32_to_i32()
+ implicit none
+ integer :: k, v, i
+
+ k = 1
+ v = 0
+
+ !$omp atomic capture
+ v = k
+ k = (i + 1) * 3.14
+ !$omp end atomic
+end subroutine
+
+subroutine capture_with_convert_i32_to_f64()
+ real(8) :: x
+ integer :: v
+ x = 1.0
+ v = 0
+ !$omp atomic capture
+ v = x
+ x = v
+ !$omp end atomic
+end subroutine capture_with_convert_i32_to_f64
+
+subroutine capture_with_convert_f64_to_i32()
+ integer :: x
+ real(8) :: v
+ x = 1
+ v = 0
+ !$omp atomic capture
+ x = v
+ v = x
+ !$omp end atomic
+end subroutine capture_with_convert_f64_to_i32
+
+subroutine capture_with_convert_i32_to_f32()
+ real(4) :: x
+ integer :: v
+ x = 1.0
+ v = 0
+ !$omp atomic capture
+ v = x
+ x = x + v
+ !$omp end atomic
+end subroutine capture_with_convert_i32_to_f32
diff --git a/flang/test/Lower/OpenMP/atomic-implicit-cast.f90 b/flang/test/Lower/OpenMP/atomic-implicit-cast.f90
new file mode 100644
index 0000000000000..75f1cbfc979b9
--- /dev/null
+++ b/flang/test/Lower/OpenMP/atomic-implicit-cast.f90
@@ -0,0 +1,56 @@
+! REQUIRES : openmp_runtime
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+! CHECK: func.func @_QPatomic_implicit_cast_read() {
+subroutine atomic_implicit_cast_read
+! CHECK: %[[ALLOCA3:.*]] = fir.alloca complex<f32>
+! CHECK: %[[ALLOCA2:.*]] = fir.alloca complex<f32>
+! CHECK: %[[ALLOCA1:.*]] = fir.alloca i32
+! CHECK: %[[ALLOCA0:.*]] = fir.alloca f32
+
+! CHECK: %[[M:.*]] = fir.alloca complex<f64> {bindc_name = "m", uniq_name = "_QFatomic_implicit_cast_readEm"}
+! CHECK: %[[M_DECL:.*]]:2 = hlfir.declare %[[M]] {uniq_name = "_QFatomic_implicit_cast_readEm"} : (!fir.ref<complex<f64>>) -> (!fir.ref<complex<f64>>, !fir.ref<complex<f64>>)
+! CHECK: %[[W:.*]] = fir.alloca complex<f32> {bindc_name = "w", uniq_name = "_QFatomic_implicit_cast_readEw"}
+! CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[W]] {uniq_name = "_QFatomic_implicit_cast_readEw"} : (!fir.ref<complex<f32>>) -> (!fir.ref<complex<f32>>, !fir.ref<complex<f32>>)
+! CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFatomic_implicit_cast_readEx"}
+! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFatomic_implicit_cast_readEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Y:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFatomic_implicit_cast_readEy"}
+! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFatomic_implicit_cast_readEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[Z:.*]] = fir.alloca f64 {bindc_name = "z", uniq_name = "_QFatomic_implicit_cast_readEz"}
+! CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z]] {uniq_name = "_QFatomic_implicit_cast_readEz"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+ integer :: x
+ real :: y
+ double precision :: z
+ complex :: w
+ complex(8) :: m
+
+! CHECK: omp.atomic.read %[[ALLOCA0:.*]] = %[[Y_DECL]]#0 : !fir.ref<f32>, !fir.ref<f32>, f32
+! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA0]] : !fir.ref<f32>
+! CHECK: %[[CVT:.*]] = fir.convert %[[LOAD]] : (f32) -> i32
+! CHECK: fir.store %[[CVT]] to %[[X_DECL]]#0 : !fir.ref<i32>
+ !$omp atomic read
+ x = y
+
+! CHECK: omp.atomic.read %[[ALLOCA1:.*]] = %[[X_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>, i32
+! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA1]] : !fir.ref<i32>
+! CHECK: %[[CVT:.*]] = fir.convert %[[LOAD]] : (i32) -> f64
+! CHECK: fir.store %[[CVT]] to %[[Z_DECL]]#0 : !fir.ref<f64>
+ !$omp atomic read
+ z = x
+
+! CHECK: omp.atomic.read %[[ALLOCA2:.*]] = %[[W_DECL]]#0 : !fir.ref<complex<f32>>, !fir.ref<complex<f32>>, complex<f32>
+! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA2]] : !fir.ref<complex<f32>>
+! CHECK: %[[EXTRACT:.*]] = fir.extract_value %[[LOAD]], [0 : index] : (complex<f32>) -> f32
+! CHECK: %[[CVT:.*]] = fir.convert %[[EXTRACT]] : (f32) -> i32
+! CHECK: fir.store %[[CVT]] to %[[X_DECL]]#0 : !fir.ref<i32>
+ !$omp atomic read
+ x = w
+
+! CHECK: omp.atomic.read %[[ALLOCA3:.*]] = %[[W_DECL]]#0 : !fir.ref<complex<f32>>, !fir.ref<complex<f32>>, complex<f32>
+! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA3]] : !fir.ref<complex<f32>>
+! CHECK: %[[CVT:.*]] = fir.convert %[[LOAD]] : (complex<f32>) -> complex<f64>
+! CHECK: fir.store %[[CVT]] to %[[M_DECL]]#0 : !fir.ref<complex<f64>>
+ !$omp atomic read
+ m = w
+end subroutine
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 63d7171b06156..06dc1184e7cf5 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -268,33 +268,6 @@ computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks,
return Result;
}
-/// Emit an implicit cast to convert \p XRead to type of variable \p V
-static llvm::Value *emitImplicitCast(IRBuilder<> &Builder, llvm::Value *XRead,
- llvm::Value *V) {
- // TODO: Add this functionality to the `AtomicInfo` interface
- llvm::Type *XReadType = XRead->getType();
- llvm::Type *VType = V->getType();
- if (llvm::AllocaInst *vAlloca = dyn_cast<llvm::AllocaInst>(V))
- VType = vAlloca->getAllocatedType();
-
- if (XReadType->isStructTy() && VType->isStructTy())
- // No need to extract or convert. A direct
- // `store` will suffice.
- return XRead;
-
- if (XReadType->isStructTy())
- XRead = Builder.CreateExtractValue(XRead, /*Idxs=*/0);
- if (VType->isIntegerTy() && XReadType->isFloatingPointTy())
- XRead = Builder.CreateFPToSI(XRead, VType);
- else if (VType->isFloatingPointTy() && XReadType->isIntegerTy())
- XRead = Builder.CreateSIToFP(XRead, VType);
- else if (VType->isIntegerTy() && XReadType->isIntegerTy())
- XRead = Builder.CreateIntCast(XRead, VType, true);
- else if (VType->isFloatingPointTy() && XReadType->isFloatingPointTy())
- XRead = Builder.CreateFPCast(XRead, VType);
- return XRead;
-}
-
/// Make \p Source branch to \p Target.
///
/// Handles two situations:
@@ -8685,8 +8658,6 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
}
}
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
- if (XRead->getType() != V.Var->getType())
- XRead = emitImplicitCast(Builder, XRead, V.Var);
Builder.CreateStore(XRead, V.Var, V.IsVolatile);
return Builder.saveIP();
}
@@ -8983,8 +8954,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
return AtomicResult.takeError();
Value *CapturedVal =
(IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
- if (CapturedVal->getType() != V.Var->getType())
- CapturedVal = emitImplicitCast(Builder, CapturedVal, V.Var);
Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 02a08eec74016..32f0ba5b105ff 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -1396,42 +1396,35 @@ llvm.func @omp_atomic_read_implicit_cast () {
//CHECK: call void @__atomic_load(i64 8, ptr %[[X_ELEMENT]], ptr %[[ATOMIC_LOAD_TEMP]], i32 0)
//CHECK: %[[LOAD:.*]] = load { float, float }, ptr %[[ATOMIC_LOAD_TEMP]], align 8
-//CHECK: %[[EXT:.*]] = extractvalue { float, float } %[[LOAD]], 0
-//CHECK: store float %[[EXT]], ptr %[[Y]], align 4
+//CHECK: store { float, float } %[[LOAD]], ptr %[[Y]], align 4
omp.atomic.read %3 = %17 : !llvm.ptr, !llvm.ptr, !llvm.struct<(f32, f32)>
//CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i32, ptr %[[Z]] monotonic, align 4
//CHECK: %[[CAST:.*]] = bitcast i32 %[[ATOMIC_LOAD_TEMP]] to float
-//CHECK: %[[LOAD:.*]] = fpext float %[[CAST]] to double
-//CHECK: store double %[[LOAD]], ptr %[[Y]], align 8
+//CHECK: store float %[[CAST]], ptr %[[Y]], align 4
omp.atomic.read %3 = %1 : !llvm.ptr, !llvm.ptr, f32
//CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i32, ptr %[[W]] monotonic, align 4
-//CHECK: %[[LOAD:.*]] = sitofp i32 %[[ATOMIC_LOAD_TEMP]] to double
-//CHECK: store double %[[LOAD]], ptr %[[Y]], align 8
+//CHECK: store i32 %[[ATOMIC_LOAD_TEMP]], ptr %[[Y]], align 4
omp.atomic.read %3 = %7 : !llvm.ptr, !llvm.ptr, i32
//CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i64, ptr %[[Y]] monotonic, align 4
//CHECK: %[[CAST:.*]] = bitcast i64 %[[ATOMIC_LOAD_TEMP]] to double
-//CHECK: %[[LOAD:.*]] = fptrunc double %[[CAST]] to float
-//CHECK: store float %[[LOAD]], ptr %[[Z]], align 4
+//CHECK: store double %[[CAST]], ptr %[[Z]], align 8
omp.atomic.read %1 = %3 : !llvm.ptr, !llvm.ptr, f64
//CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i32, ptr %[[W]] monotonic, align 4
-//CHECK: %[[LOAD:.*]] = sitofp i32 %[[ATOMIC_LOAD_TEMP]] to float
-//CHECK: store float %[[LOAD]], ptr %[[Z]], align 4
+//CHECK: store i32 %[[ATOMIC_LOAD_TEMP]], ptr %[[Z]], align 4
omp.atomic.read %1 = %7 : !llvm.ptr, !llvm.ptr, i32
//CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i64, ptr %[[Y]] monotonic, align 4
//CHECK: %[[CAST:.*]] = bitcast i64 %[[ATOMIC_LOAD_TEMP]] to double
-//CHECK: %[[LOAD:.*]] = fptosi double %[[CAST]] to i32
-//CHECK: store i32 %[[LOAD]], ptr %[[W]], align 4
+//CHECK: store double %[[CAST]], ptr %[[W]], align 8
omp.atomic.read %7 = %3 : !llvm.ptr, !llvm.ptr, f64
//CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i32, ptr %[[Z]] monotonic, align 4
//CHECK: %[[CAST:.*]] = bitcast i32 %[[ATOMIC_LOAD_TEMP]] to float
-//CHECK: %[[LOAD:.*]] = fptosi float %[[CAST]] to i32
-//CHECK: store i32 %[[LOAD]], ptr %[[W]], align 4
+//CHECK: store float %[[CAST]], ptr %[[W]], align 4
omp.atomic.read %7 = %1 : !llvm.ptr, !llvm.ptr, f32
llvm.return
}
More information about the flang-commits
mailing list