[flang-commits] [flang] 9d8e475 - [flang][openacc] Update materialization recipe for private copy in reduction init region

Mon Jul 24 09:33:44 PDT 2023

Author: Valentin Clement
Date: 2023-07-24T09:33:39-07:00
New Revision: 9d8e4759a806dd16fc59901d031ebd03f1da0183

URL: https://github.com/llvm/llvm-project/commit/9d8e4759a806dd16fc59901d031ebd03f1da0183
DIFF: https://github.com/llvm/llvm-project/commit/9d8e4759a806dd16fc59901d031ebd03f1da0183.diff

LOG: [flang][openacc] Update materialization recipe for private copy in reduction init region

Update the code generated in the init region to materialize the private
copy.

Reviewed By: razvanlupusoru

Differential Revision: https://reviews.llvm.org/D155882

Added: 
    

Modified: 
    flang/lib/Lower/OpenACC.cpp
    flang/test/Lower/OpenACC/acc-kernels-loop.f90
    flang/test/Lower/OpenACC/acc-loop.f90
    flang/test/Lower/OpenACC/acc-parallel-loop.f90
    flang/test/Lower/OpenACC/acc-parallel.f90
    flang/test/Lower/OpenACC/acc-reduction.f90
    flang/test/Lower/OpenACC/acc-serial-loop.f90
    flang/test/Lower/OpenACC/acc-serial.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 565ce5bba518db..3d20ef5ea7e52a 100644

--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -703,10 +703,11 @@ bool hasDynamicShape(llvm::SmallVector<mlir::Value> &bounds) {
   return false;
 }
 
-static mlir::Value
-genReductionInitValue(fir::FirOpBuilder &builder, mlir::Location loc,
-                      mlir::Type ty, mlir::acc::ReductionOperator op,
-                      llvm::SmallVector<mlir::Value> &bounds) {
+/// Return a constant with the initial value for the reduction operator and
+/// type combination.
+static mlir::Value getReductionInitValue(fir::FirOpBuilder &builder,
+                                         mlir::Location loc, mlir::Type ty,
+                                         mlir::acc::ReductionOperator op) {
   if (op == mlir::acc::ReductionOperator::AccLand ||
       op == mlir::acc::ReductionOperator::AccLor ||
       op == mlir::acc::ReductionOperator::AccEqv ||
@@ -745,26 +746,48 @@ genReductionInitValue(fir::FirOpBuilder &builder, mlir::Location loc,
     return fir::factory::Complex{builder, loc}.createComplex(
         cmplxTy.getFKind(), realInit, imagInit);
   }
-  if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
-    if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
-      mlir::Type vecTy =
-          mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy());
-      auto shTy = vecTy.cast<mlir::ShapedType>();
-      if (seqTy.getEleTy().isIntOrIndex())
-        return builder.create<mlir::arith::ConstantOp>(
-            loc, vecTy,
-            mlir::DenseElementsAttr::get(
-                shTy,
-                getReductionInitValue<llvm::APInt>(op, seqTy.getEleTy())));
-      if (mlir::isa<mlir::FloatType>(seqTy.getEleTy()))
-        return builder.create<mlir::arith::ConstantOp>(
-            loc, vecTy,
-            mlir::DenseElementsAttr::get(
-                shTy,
-                getReductionInitValue<llvm::APFloat>(op, seqTy.getEleTy())));
+
+  if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty))
+    return getReductionInitValue(builder, loc, seqTy.getEleTy(), op);
+
+  llvm::report_fatal_error("Unsupported OpenACC reduction type");
+}
+
+static mlir::Value genReductionInitRegion(fir::FirOpBuilder &builder,
+                                          mlir::Location loc, mlir::Type ty,
+                                          mlir::acc::ReductionOperator op) {
+  ty = fir::unwrapRefType(ty);
+  mlir::Value initValue = getReductionInitValue(builder, loc, ty, op);
+  if (fir::isa_trivial(ty)) {
+    mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty);
+    builder.create<fir::StoreOp>(loc, builder.createConvert(loc, ty, initValue),
+                                 alloca);
+    return alloca;
+  } else if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(ty)) {
+    if (seqTy.hasDynamicExtents())
+      TODO(loc, "private recipe of array with dynamic extents");
+    if (fir::isa_trivial(seqTy.getEleTy())) {
+      mlir::Value alloca = builder.create<fir::AllocaOp>(loc, seqTy);
+      mlir::Type idxTy = builder.getIndexType();
+      mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
+      llvm::SmallVector<fir::DoLoopOp> loops;
+      llvm::SmallVector<mlir::Value> ivs;
+      for (auto ext : llvm::reverse(seqTy.getShape())) {
+        auto lb = builder.createIntegerConstant(loc, idxTy, 0);
+        auto ub = builder.createIntegerConstant(loc, idxTy, ext - 1);
+        auto step = builder.createIntegerConstant(loc, idxTy, 1);
+        auto loop = builder.create<fir::DoLoopOp>(loc, lb, ub, step,
+                                                  /*unordered=*/false);
+        builder.setInsertionPointToStart(loop.getBody());
+        loops.push_back(loop);
+        ivs.push_back(loop.getInductionVar());
+      }
+      auto coord = builder.create<fir::CoordinateOp>(loc, refTy, alloca, ivs);
+      builder.create<fir::StoreOp>(loc, initValue, coord);
+      builder.setInsertionPointAfter(loops[0]);
+      return alloca;
     }
   }
-
   llvm::report_fatal_error("Unsupported OpenACC reduction type");
 }
 
@@ -775,8 +798,16 @@ static mlir::Value genLogicalCombiner(fir::FirOpBuilder &builder,
   mlir::Type i1 = builder.getI1Type();
   mlir::Value v1 = builder.create<fir::ConvertOp>(loc, i1, value1);
   mlir::Value v2 = builder.create<fir::ConvertOp>(loc, i1, value2);
-  mlir::Value add = builder.create<Op>(loc, v1, v2);
-  return builder.create<fir::ConvertOp>(loc, value1.getType(), add);
+  mlir::Value combined = builder.create<Op>(loc, v1, v2);
+  return builder.create<fir::ConvertOp>(loc, value1.getType(), combined);
+}
+
+static mlir::Value loadIfRef(fir::FirOpBuilder &builder, mlir::Location loc,
+                             mlir::Value value) {
+  if (mlir::isa<fir::ReferenceType, fir::PointerType, fir::HeapType>(
+          value.getType()))
+    return builder.create<fir::LoadOp>(loc, value);
+  return value;
 }
 
 static mlir::Value genComparisonCombiner(fir::FirOpBuilder &builder,
@@ -791,45 +822,13 @@ static mlir::Value genComparisonCombiner(fir::FirOpBuilder &builder,
   return builder.create<fir::ConvertOp>(loc, value1.getType(), add);
 }
 
-static mlir::Value genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
-                               mlir::acc::ReductionOperator op, mlir::Type ty,
-                               mlir::Value value1, mlir::Value value2) {
-
-  // Handle combiner on arrays.
-  if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
-    if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
-      if (seqTy.hasDynamicExtents())
-        TODO(loc, "OpenACC reduction on array with dynamic extents");
-      mlir::Type idxTy = builder.getIndexType();
-      mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
-
-      llvm::SmallVector<fir::DoLoopOp> loops;
-      llvm::SmallVector<mlir::Value> ivs;
-      for (auto ext : llvm::reverse(seqTy.getShape())) {
-        auto lb = builder.create<mlir::arith::ConstantOp>(
-            loc, idxTy, builder.getIntegerAttr(idxTy, 0));
-        auto ub = builder.create<mlir::arith::ConstantOp>(
-            loc, idxTy, builder.getIntegerAttr(idxTy, ext - 1));
-        auto step = builder.create<mlir::arith::ConstantOp>(
-            loc, idxTy, builder.getIntegerAttr(idxTy, 1));
-        auto loop = builder.create<fir::DoLoopOp>(loc, lb, ub, step,
-                                                  /*unordered=*/false);
-        builder.setInsertionPointToStart(loop.getBody());
-        loops.push_back(loop);
-        ivs.push_back(loop.getInductionVar());
-      }
-      auto addr1 = builder.create<fir::CoordinateOp>(loc, refTy, value1, ivs);
-      auto addr2 = builder.create<fir::CoordinateOp>(loc, refTy, value2, ivs);
-      auto load1 = builder.create<fir::LoadOp>(loc, addr1);
-      auto load2 = builder.create<fir::LoadOp>(loc, addr2);
-      auto combined =
-          genCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2);
-      builder.create<fir::StoreOp>(loc, combined, addr1);
-      builder.setInsertionPointAfter(loops[0]);
-      return value1;
-    }
-  }
-
+static mlir::Value genScalarCombiner(fir::FirOpBuilder &builder,
+                                     mlir::Location loc,
+                                     mlir::acc::ReductionOperator op,
+                                     mlir::Type ty, mlir::Value value1,
+                                     mlir::Value value2) {
+  value1 = loadIfRef(builder, loc, value1);
+  value2 = loadIfRef(builder, loc, value2);
   if (op == mlir::acc::ReductionOperator::AccAdd) {
     if (ty.isIntOrIndex())
       return builder.create<mlir::arith::AddIOp>(loc, value1, value2);
@@ -883,6 +882,43 @@ static mlir::Value genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
   TODO(loc, "reduction operator");
 }
 
+static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
+                        mlir::acc::ReductionOperator op, mlir::Type ty,
+                        mlir::Value value1, mlir::Value value2) {
+  ty = fir::unwrapRefType(ty);
+
+  if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty)) {
+    if (seqTy.hasDynamicExtents())
+      TODO(loc, "OpenACC reduction on array with dynamic extents");
+    mlir::Type idxTy = builder.getIndexType();
+    mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
+
+    llvm::SmallVector<fir::DoLoopOp> loops;
+    llvm::SmallVector<mlir::Value> ivs;
+    for (auto ext : llvm::reverse(seqTy.getShape())) {
+      auto lb = builder.createIntegerConstant(loc, idxTy, 0);
+      auto ub = builder.createIntegerConstant(loc, idxTy, ext - 1);
+      auto step = builder.createIntegerConstant(loc, idxTy, 1);
+      auto loop = builder.create<fir::DoLoopOp>(loc, lb, ub, step,
+                                                /*unordered=*/false);
+      builder.setInsertionPointToStart(loop.getBody());
+      loops.push_back(loop);
+      ivs.push_back(loop.getInductionVar());
+    }
+    auto addr1 = builder.create<fir::CoordinateOp>(loc, refTy, value1, ivs);
+    auto addr2 = builder.create<fir::CoordinateOp>(loc, refTy, value2, ivs);
+    auto load1 = builder.create<fir::LoadOp>(loc, addr1);
+    auto load2 = builder.create<fir::LoadOp>(loc, addr2);
+    mlir::Value res =
+        genScalarCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2);
+    builder.create<fir::StoreOp>(loc, res, addr1);
+    builder.setInsertionPointAfter(loops[0]);
+  } else {
+    mlir::Value res = genScalarCombiner(builder, loc, op, ty, value1, value2);
+    builder.create<fir::StoreOp>(loc, res, value1);
+  }
+}
+
 mlir::acc::ReductionRecipeOp Fortran::lower::createOrGetReductionRecipe(
     fir::FirOpBuilder &builder, llvm::StringRef recipeName, mlir::Location loc,
     mlir::Type ty, mlir::acc::ReductionOperator op,
@@ -899,7 +935,7 @@ mlir::acc::ReductionRecipeOp Fortran::lower::createOrGetReductionRecipe(
   builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(),
                       {ty}, {loc});
   builder.setInsertionPointToEnd(&recipe.getInitRegion().back());
-  mlir::Value initValue = genReductionInitValue(builder, loc, ty, op, bounds);
+  mlir::Value initValue = genReductionInitRegion(builder, loc, ty, op);
   builder.create<mlir::acc::YieldOp>(loc, initValue);
 
   builder.createBlock(&recipe.getCombinerRegion(),
@@ -907,8 +943,8 @@ mlir::acc::ReductionRecipeOp Fortran::lower::createOrGetReductionRecipe(
   builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
   mlir::Value v1 = recipe.getCombinerRegion().front().getArgument(0);
   mlir::Value v2 = recipe.getCombinerRegion().front().getArgument(1);
-  mlir::Value combinedValue = genCombiner(builder, loc, op, ty, v1, v2);
-  builder.create<mlir::acc::YieldOp>(loc, combinedValue);
+  genCombiner(builder, loc, op, ty, v1, v2);
+  builder.create<mlir::acc::YieldOp>(loc, v1);
   builder.restoreInsertionPoint(crtPos);
   return recipe;
 }
@@ -950,9 +986,7 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
     auto op = createDataEntryOp<mlir::acc::ReductionOp>(
         builder, operandLocation, baseAddr, asFortran, bounds,
         /*structured=*/true, mlir::acc::DataClause::acc_reduction, retTy);
-    mlir::Type ty = fir::unwrapRefType(op.getAccPtr().getType());
-    if (!fir::isa_trivial(ty))
-      ty = retTy;
+    mlir::Type ty = op.getAccPtr().getType();
     std::string recipeName = fir::getTypeAsString(
         ty, converter.getKindMap(),
         ("reduction_" + stringifyReductionOperator(mlirOp)).str());

diff  --git a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
index 36b66213279f5e..05cd5ffe5f692f 100644
--- a/flang/test/Lower/OpenACC/acc-kernels-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
@@ -720,7 +720,7 @@ subroutine acc_kernels_loop
   end do
 
 ! CHECK:      acc.kernels {
-! CHECK:        acc.loop reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}

diff  --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
index bd4d36d8dc0129..eac4fa41af82a3 100644
--- a/flang/test/Lower/OpenACC/acc-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -279,7 +279,7 @@ program acc_loop
     reduction_i = 1
   end do
 
-! CHECK:      acc.loop reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:      acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
 ! CHECK:        fir.do_loop
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}

diff  --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
index 011c9dc3989250..2e65ddbd36db43 100644
--- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
@@ -760,8 +760,8 @@ subroutine acc_parallel_loop
     reduction_i = 1
   end do
 
-! CHECK:      acc.parallel reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_i32 -> %{{.*}} : !fir.ref<i32>) {
-! CHECK:        acc.loop reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:      acc.parallel reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}

diff  --git a/flang/test/Lower/OpenACC/acc-parallel.f90 b/flang/test/Lower/OpenACC/acc-parallel.f90
index 1bfbf02505e02e..67e66097ae6ffe 100644
--- a/flang/test/Lower/OpenACC/acc-parallel.f90
+++ b/flang/test/Lower/OpenACC/acc-parallel.f90
@@ -326,7 +326,7 @@ subroutine acc_parallel
 !$acc parallel reduction(+:reduction_r) reduction(*:reduction_i)
 !$acc end parallel
 
-! CHECK:      acc.parallel reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:      acc.parallel reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
 

diff  --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index ae900c096ab185..21cdae750664e1 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -4,8 +4,16 @@
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_10xi32 : !fir.ref<!fir.array<10xi32>> reduction_operator <add> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<10xi32>>):
-! CHECK:   %[[CST:.*]] = arith.constant dense<0> : vector<10xi32>
-! CHECK:   acc.yield %[[CST]] : vector<10xi32>
+! CHECK:   %[[INIT:.*]] = arith.constant 0 : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<10xi32>
+! CHECK:   %[[LB:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB:.*]] = arith.constant 9 : index
+! CHECK:   %[[STEP:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
+! CHECK:     %[[COORD:.*]] = fir.coordinate_of %0, %[[IV]] : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+! CHECK:     fir.store %[[INIT]] to %[[COORD]] : !fir.ref<i32>
+! CHECK:   }
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<10xi32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<10xi32>>):
 ! CHECK:   %[[LB:.*]] = arith.constant 0 : index
@@ -22,120 +30,177 @@
 ! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<10xi32>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_mul_z32 : !fir.complex<4> reduction_operator <mul> init {
-! CHECK: ^bb0(%{{.*}}: !fir.complex<4>):
+! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_z32 : !fir.ref<!fir.complex<4>> reduction_operator <mul> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.complex<4>>):
 ! CHECK:   %[[REAL:.*]] = arith.constant 1.000000e+00 : f32
 ! CHECK:   %[[IMAG:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:   %[[UNDEF:.*]] = fir.undefined !fir.complex<4>
 ! CHECK:   %[[UNDEF1:.*]] = fir.insert_value %[[UNDEF]], %[[REAL]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4>
 ! CHECK:   %[[UNDEF2:.*]] = fir.insert_value %[[UNDEF1]], %[[IMAG]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4>
-! CHECK:   acc.yield %[[UNDEF2]] : !fir.complex<4>
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.complex<4>
+! CHECK:   fir.store %[[UNDEF2]] to %[[ALLOCA]] : !fir.ref<!fir.complex<4>>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.complex<4>>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: !fir.complex<4>, %[[ARG1:.*]]: !fir.complex<4>):
-! CHECK:   %[[COMBINED:.*]] = fir.mulc %[[ARG0]], %[[ARG1]] : !fir.complex<4>
-! CHECK:   acc.yield %[[COMBINED]] : !fir.complex<4>
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.complex<4>>, %[[ARG1:.*]]: !fir.ref<!fir.complex<4>>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.complex<4>>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.complex<4>>
+! CHECK:   %[[COMBINED:.*]] = fir.mulc %[[LOAD0]], %[[LOAD1]] : !fir.complex<4>
+! CHECK:   fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<!fir.complex<4>>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.complex<4>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_add_z32 : !fir.complex<4> reduction_operator <add> init {
-! CHECK: ^bb0(%{{.*}}: !fir.complex<4>):
+! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_z32 : !fir.ref<!fir.complex<4>> reduction_operator <add> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.complex<4>>):
 ! CHECK:   %[[REAL:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:   %[[IMAG:.*]] = arith.constant 0.000000e+00 : f32
 ! CHECK:   %[[UNDEF:.*]] = fir.undefined !fir.complex<4>
 ! CHECK:   %[[UNDEF1:.*]] = fir.insert_value %[[UNDEF]], %[[REAL]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4>
 ! CHECK:   %[[UNDEF2:.*]] = fir.insert_value %[[UNDEF1]], %[[IMAG]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4>
-! CHECK:   acc.yield %[[UNDEF2]] : !fir.complex<4>
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.complex<4>
+! CHECK:   fir.store %[[UNDEF2]] to %[[ALLOCA]] : !fir.ref<!fir.complex<4>>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.complex<4>>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: !fir.complex<4>, %[[ARG1:.*]]: !fir.complex<4>):
-! CHECK:   %[[COMBINED:.*]] = fir.addc %[[ARG0]], %[[ARG1]] : !fir.complex<4> 
-! CHECK:   acc.yield %[[COMBINED]] : !fir.complex<4>
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.complex<4>>, %[[ARG1:.*]]: !fir.ref<!fir.complex<4>>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.complex<4>>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.complex<4>>
+! CHECK:   %[[COMBINED:.*]] = fir.addc %[[LOAD0]], %[[LOAD1]] : !fir.complex<4>
+! CHECK:   fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<!fir.complex<4>>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.complex<4>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_neqv_l32 : !fir.logical<4> reduction_operator <neqv> init {
-! CHECK: ^bb0(%{{.*}}: !fir.logical<4>):
+! CHECK-LABEL: acc.reduction.recipe @reduction_neqv_ref_l32 : !fir.ref<!fir.logical<4>> reduction_operator <neqv> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.logical<4>>):
 ! CHECK:   %[[CST:.*]] = arith.constant false
-! CHECK:   acc.yield %[[CST]] : i1
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.logical<4>
+! CHECK:   %[[CONVERT:.*]] = fir.convert %[[CST]] : (i1) -> !fir.logical<4>
+! CHECK:   fir.store %[[CONVERT]] to %[[ALLOCA]] : !fir.ref<!fir.logical<4>>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.logical<4>>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>):
-! CHECK:   %[[V1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1
-! CHECK:   %[[V2:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1
-! CHECK:   %[[NEQV:.*]] = arith.cmpi ne, %[[V1]], %[[V2]] : i1
-! CHECK:   %[[CONV:.*]] = fir.convert %[[NEQV]] : (i1) -> !fir.logical<4>
-! CHECK:   acc.yield %[[CONV]] : !fir.logical<4>
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:   %[[CONV0:.*]] = fir.convert %[[LOAD0]] : (!fir.logical<4>) -> i1
+! CHECK:   %[[CONV1:.*]] = fir.convert %[[LOAD1]] : (!fir.logical<4>) -> i1
+! CHECK:   %[[CMP:.*]] = arith.cmpi ne, %[[CONV0]], %[[CONV1]] : i1
+! CHECK:   %[[CMP_CONV:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4>
+! CHECK:   fir.store %[[CMP_CONV]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.logical<4>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_eqv_l32 : !fir.logical<4> reduction_operator <eqv> init {
-! CHECK: ^bb0(%{{.*}}: !fir.logical<4>):
+! CHECK-LABEL: acc.reduction.recipe @reduction_eqv_ref_l32 : !fir.ref<!fir.logical<4>> reduction_operator <eqv> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.logical<4>>):
 ! CHECK:   %[[CST:.*]] = arith.constant true
-! CHECK:   acc.yield %[[CST]] : i1
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.logical<4>
+! CHECK:   %[[CONVERT:.*]] = fir.convert %[[CST]] : (i1) -> !fir.logical<4>
+! CHECK:   fir.store %[[CONVERT]] to %[[ALLOCA]] : !fir.ref<!fir.logical<4>>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.logical<4>>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>):
-! CHECK:   %[[V1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1
-! CHECK:   %[[V2:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1
-! CHECK:   %[[EQV:.*]] = arith.cmpi eq, %[[V1]], %[[V2]] : i1
-! CHECK:   %[[CONV:.*]] = fir.convert %[[EQV]] : (i1) -> !fir.logical<4>
-! CHECK:   acc.yield %[[CONV]] : !fir.logical<4>
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:   %[[CONV0:.*]] = fir.convert %[[LOAD0]] : (!fir.logical<4>) -> i1
+! CHECK:   %[[CONV1:.*]] = fir.convert %[[LOAD1]] : (!fir.logical<4>) -> i1
+! CHECK:   %[[CMP:.*]] = arith.cmpi eq, %[[CONV0]], %[[CONV1]] : i1
+! CHECK:   %[[CMP_CONV:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4>
+! CHECK:   fir.store %[[CMP_CONV]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.logical<4>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_lor_l32 : !fir.logical<4> reduction_operator <lor> init {
-! CHECK: ^bb0(%{{.*}}: !fir.logical<4>):
+! CHECK-LABEL: acc.reduction.recipe @reduction_lor_ref_l32 : !fir.ref<!fir.logical<4>> reduction_operator <lor> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.logical<4>>):
 ! CHECK:   %[[CST:.*]] = arith.constant false
-! CHECK:   acc.yield %[[CST]] : i1
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.logical<4>
+! CHECK:   %[[CONVERT:.*]] = fir.convert %[[CST]] : (i1) -> !fir.logical<4>
+! CHECK:   fir.store %[[CONVERT]] to %[[ALLOCA]] : !fir.ref<!fir.logical<4>>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.logical<4>>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>):
-! CHECK:   %[[V1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1
-! CHECK:   %[[V2:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1
-! CHECK:   %[[AND:.*]] = arith.ori %[[V1]], %[[V2]] : i1
-! CHECK:   %[[CONV:.*]] = fir.convert %[[AND]] : (i1) -> !fir.logical<4>
-! CHECK:   acc.yield %[[CONV]] : !fir.logical<4>
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:   %[[CONV0:.*]] = fir.convert %[[LOAD0]] : (!fir.logical<4>) -> i1
+! CHECK:   %[[CONV1:.*]] = fir.convert %[[LOAD1]] : (!fir.logical<4>) -> i1
+! CHECK:   %[[CMP:.*]] = arith.ori %[[CONV0]], %[[CONV1]] : i1
+! CHECK:   %[[CMP_CONV:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4>
+! CHECK:   fir.store %[[CMP_CONV]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.logical<4>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_land_l32 : !fir.logical<4> reduction_operator <land> init {
-! CHECK: ^bb0(%{{.*}}: !fir.logical<4>):
+! CHECK-LABEL: acc.reduction.recipe @reduction_land_ref_l32 : !fir.ref<!fir.logical<4>> reduction_operator <land> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.logical<4>>):
 ! CHECK:   %[[CST:.*]] = arith.constant true
-! CHECK:   acc.yield %[[CST]] : i1
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.logical<4>
+! CHECK:   %[[CONVERT:.*]] = fir.convert %[[CST]] : (i1) -> !fir.logical<4>
+! CHECK:   fir.store %[[CONVERT]] to %[[ALLOCA]] : !fir.ref<!fir.logical<4>>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.logical<4>>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>):
-! CHECK:   %[[V1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1
-! CHECK:   %[[V2:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1
-! CHECK:   %[[AND:.*]] = arith.andi %[[V1]], %[[V2]] : i1
-! CHECK:   %[[CONV:.*]] = fir.convert %[[AND]] : (i1) -> !fir.logical<4>
-! CHECK:   acc.yield %[[CONV]] : !fir.logical<4>
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.logical<4>>, %[[ARG1:.*]]: !fir.ref<!fir.logical<4>>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<!fir.logical<4>>
+! CHECK:   %[[CONV0:.*]] = fir.convert %[[LOAD0]] : (!fir.logical<4>) -> i1
+! CHECK:   %[[CONV1:.*]] = fir.convert %[[LOAD1]] : (!fir.logical<4>) -> i1
+! CHECK:   %[[CMP:.*]] = arith.andi %[[CONV0]], %[[CONV1]] : i1
+! CHECK:   %[[CMP_CONV:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4>
+! CHECK:   fir.store %[[CMP_CONV]] to %[[ARG0]] : !fir.ref<!fir.logical<4>>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.logical<4>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_xor_i32 : i32 reduction_operator <xor> init {
-! CHECK: ^bb0(%{{.*}}: i32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_xor_ref_i32 : !fir.ref<i32> reduction_operator <xor> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
 ! CHECK:   %[[CST:.*]] = arith.constant 0 : i32
-! CHECK:   acc.yield %[[CST]] : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca i32
+! CHECK:   fir.store %[[CST]] to %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-! CHECK:   %[[COMBINED:.*]] = arith.xori %[[ARG0]], %[[ARG1]] : i32
-! CHECK:   acc.yield %[[COMBINED]] : i32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:   %[[COMBINED:.*]] = arith.xori %[[LOAD0]], %[[LOAD1]] : i32
+! CHECK:   fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<i32>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_ior_i32 : i32 reduction_operator <ior> init {
-! CHECK: ^bb0(%{{.*}}: i32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_ior_ref_i32 : !fir.ref<i32> reduction_operator <ior> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
 ! CHECK:   %[[CST:.*]] = arith.constant 0 : i32
-! CHECK:   acc.yield %[[CST]] : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca i32
+! CHECK:   fir.store %[[CST]] to %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-! CHECK:   %[[COMBINED:.*]] = arith.ori %[[ARG0]], %[[ARG1]] : i32
-! CHECK:   acc.yield %[[COMBINED]] : i32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:   %[[COMBINED:.*]] = arith.ori %[[LOAD0]], %[[LOAD1]] : i32
+! CHECK:   fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<i32>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_iand_i32 : i32 reduction_operator <iand> init {
-! CHECK: ^bb0(%{{.*}}: i32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_iand_ref_i32 : !fir.ref<i32> reduction_operator <iand> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
 ! CHECK:   %[[CST:.*]] = arith.constant -1 : i32
-! CHECK:   acc.yield %[[CST]] : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca i32
+! CHECK:   fir.store %[[CST]] to %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-! CHECK:   %[[COMBINED:.*]] = arith.andi %[[ARG0]], %[[ARG1]] : i32
-! CHECK:   acc.yield %[[COMBINED]] : i32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:   %[[COMBINED:.*]] = arith.andi %[[LOAD0]], %[[LOAD1]] : i32
+! CHECK:   fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<i32>
 ! CHECK: }
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_100xf32 : !fir.ref<!fir.array<100xf32>> reduction_operator <max> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xf32>>):
-! CHECK:   %[[CST:.*]] = arith.constant dense<-1.401300e-45> : vector<100xf32>
-! CHECK:   acc.yield %[[CST]] : vector<100xf32>
+! CHECK:   %[[INIT:.*]] = arith.constant -1.401300e-45 : f32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<100xf32>
+! CHECK:   %[[LB:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB:.*]] = arith.constant 99 : index
+! CHECK:   %[[STEP:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
+! CHECK:     %[[COORD:.*]] = fir.coordinate_of %[[ALLOCA]], %[[IV]] : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+! CHECK:     fir.store %[[INIT]] to %[[COORD]] : !fir.ref<f32>
+! CHECK:   }
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<100xf32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xf32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
@@ -153,21 +218,27 @@
 ! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xf32>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_max_f32 : f32 reduction_operator <max> init {
-! CHECK: ^bb0(%{{.*}}: f32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_f32 : !fir.ref<f32> reduction_operator <max> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
 ! CHECK:   %[[INIT:.*]] = arith.constant -1.401300e-45 : f32
-! CHECK:   acc.yield %[[INIT]] : f32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca f32
+! CHECK:   fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref<f32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<f32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32):
-! CHECK:   %[[CMP:.*]] = arith.cmpf ogt, %[[ARG0]], %[[ARG1]] : f32
-! CHECK:   %[[SELECT:.*]] = arith.select %[[CMP]], %[[ARG0]], %[[ARG1]] : f32
-! CHECK:   acc.yield %[[SELECT]] : f32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK:   %[[CMP:.*]] = arith.cmpf ogt, %[[LOAD0]], %[[LOAD1]] : f32
+! CHECK:   %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD0]], %[[LOAD1]] : f32
+! CHECK:   fir.store %[[SELECT]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<f32>
 ! CHECK: }
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_100x10xi32 : !fir.ref<!fir.array<100x10xi32>> reduction_operator <max> init {
 ! CHECK: ^bb0(%arg0: !fir.ref<!fir.array<100x10xi32>>):
-! CHECK:   %[[CST:.*]] = arith.constant dense<-2147483648> : vector<100x10xi32>
-! CHECK:   acc.yield %[[CST]] : vector<100x10xi32>
+! CHECK:   %[[INIT:.*]] = arith.constant -2147483648 : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xi32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<100x10xi32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
@@ -190,21 +261,27 @@
 ! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100x10xi32>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_max_i32 : i32 reduction_operator <max> init {
-! CHECK: ^bb0(%arg0: i32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_i32 : !fir.ref<i32> reduction_operator <max> init {
+! CHECK: ^bb0(%arg0: !fir.ref<i32>):
 ! CHECK:   %[[INIT:.*]] = arith.constant -2147483648 : i32
-! CHECK:   acc.yield %[[INIT]] : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca i32
+! CHECK:   fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-! CHECK:   %[[CMP:.*]] = arith.cmpi sgt, %[[ARG0]], %[[ARG1]] : i32
-! CHECK:   %[[SELECT:.*]] = arith.select %[[CMP]], %[[ARG0]], %[[ARG1]] : i32
-! CHECK:   acc.yield %[[SELECT]] : i32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:   %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD0]], %[[LOAD1]] : i32
+! CHECK:   %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD0]], %[[LOAD1]] : i32
+! CHECK:   fir.store %[[SELECT]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<i32>
 ! CHECK: }
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_100x10xf32 : !fir.ref<!fir.array<100x10xf32>> reduction_operator <min> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100x10xf32>>):
-! CHECK:   %[[CST:.*]] = arith.constant dense<3.40282347E+38> : vector<100x10xf32>
-! CHECK:   acc.yield %[[CST]] : vector<100x10xf32>
+! CHECK:   %[[INIT:.*]] = arith.constant 3.40282347E+38 : f32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xf32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<100x10xf32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
@@ -227,21 +304,27 @@
 ! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100x10xf32>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_min_f32 : f32 reduction_operator <min> init {
-! CHECK: ^bb0(%{{.*}}: f32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_f32 : !fir.ref<f32> reduction_operator <min> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
 ! CHECK:   %[[INIT:.*]] = arith.constant 3.40282347E+38 : f32
-! CHECK:   acc.yield %[[INIT]] : f32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca f32
+! CHECK:   fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref<f32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<f32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32):
-! CHECK:   %[[CMP:.*]] = arith.cmpf olt, %[[ARG0]], %[[ARG1]] : f32
-! CHECK:   %[[SELECT:.*]] = arith.select %[[CMP]], %[[ARG0]], %[[ARG1]] : f32
-! CHECK:   acc.yield %[[SELECT]] : f32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK:   %[[CMP:.*]] = arith.cmpf olt, %[[LOAD0]], %[[LOAD1]] : f32
+! CHECK:   %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD0]], %[[LOAD1]] : f32
+! CHECK:   fir.store %[[SELECT]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<f32>
 ! CHECK: }
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <min> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xi32>>):
-! CHECK:   %[[CST:.*]] = arith.constant dense<2147483647> : vector<100xi32>
-! CHECK:   acc.yield %[[CST]] : vector<100xi32>
+! CHECK:   %[[INIT:.*]] = arith.constant 2147483647 : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<100xi32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
@@ -259,31 +342,42 @@
 ! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xi32>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_min_i32 : i32 reduction_operator <min> init {
-! CHECK: ^bb0(%arg0: i32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_i32 : !fir.ref<i32> reduction_operator <min> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
 ! CHECK:   %[[INIT:.*]] = arith.constant 2147483647 : i32
-! CHECK:   acc.yield %[[INIT]] : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca i32
+! CHECK:   fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-! CHECK:   %[[CMP:.*]] = arith.cmpi slt, %[[ARG0]], %[[ARG1]] : i32
-! CHECK:   %[[SELECT:.*]] = arith.select %[[CMP]], %[[ARG0]], %[[ARG1]] : i32
-! CHECK:   acc.yield %[[SELECT]] : i32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:   %[[CMP:.*]] = arith.cmpi slt, %[[LOAD0]], %[[LOAD1]] : i32
+! CHECK:   %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD0]], %[[LOAD1]] : i32
+! CHECK:   fir.store %[[SELECT]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<i32>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_mul_f32 : f32 reduction_operator <mul> init {
-! CHECK: ^bb0(%{{.*}}: f32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_f32 : !fir.ref<f32> reduction_operator <mul> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
 ! CHECK:   %[[INIT:.*]] = arith.constant 1.000000e+00 : f32
-! CHECK:   acc.yield %[[INIT]] : f32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca f32
+! CHECK:   fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref<f32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<f32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32):
-! CHECK:   %[[COMBINED:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] {{.*}} : f32
-! CHECK:   acc.yield %[[COMBINED]] : f32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK:   %[[COMBINED:.*]] = arith.mulf %[[LOAD0]], %[[LOAD1]] fastmath<contract> : f32
+! CHECK:   fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<f32>
 ! CHECK: }
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <mul> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xi32>>):
-! CHECK:   %[[CST:.*]] = arith.constant dense<1> : vector<100xi32>
-! CHECK:   acc.yield %[[CST]] : vector<100xi32>
+! CHECK:   %[[INIT:.*]] = arith.constant 1 : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<100xi32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xi32>>):
 ! CHECK:   %[[LB:.*]] = arith.constant 0 : index
@@ -300,20 +394,26 @@
 ! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xi32>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_mul_i32 : i32 reduction_operator <mul> init {
-! CHECK: ^bb0(%{{.*}}: i32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_i32 : !fir.ref<i32> reduction_operator <mul> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
 ! CHECK:   %[[INIT:.*]] = arith.constant 1 : i32
-! CHECK:   acc.yield %[[INIT]] : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca i32
+! CHECK:   fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-! CHECK:   %[[COMBINED:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i32
-! CHECK:   acc.yield %[[COMBINED]] : i32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:   %[[COMBINED:.*]] = arith.muli %[[LOAD0]], %[[LOAD1]] : i32
+! CHECK:   fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<i32>
 ! CHECK: }
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100xf32 : !fir.ref<!fir.array<100xf32>> reduction_operator <add> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xf32>>):
-! CHECK:   %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<100xf32>
-! CHECK:   acc.yield %[[CST]] : vector<100xf32>
+! CHECK:   %[[INIT:.*]] = arith.constant 0.000000e+00 : f32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<100xf32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<100xf32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xf32>>):
 ! CHECK:   %[[LB:.*]] = arith.constant 0 : index
@@ -330,20 +430,26 @@
 ! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xf32>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_add_f32 : f32 reduction_operator <add> init {
-! CHECK: ^bb0(%{{.*}}: f32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_f32 : !fir.ref<f32> reduction_operator <add> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
 ! CHECK:   %[[INIT:.*]] = arith.constant 0.000000e+00 : f32
-! CHECK:   acc.yield %[[INIT]] : f32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca f32
+! CHECK:   fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref<f32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<f32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32):
-! CHECK:   %[[COMBINED:.*]] = arith.addf %[[ARG0]], %[[ARG1]] {{.*}} : f32
-! CHECK:   acc.yield %[[COMBINED]] : f32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<f32>, %[[ARG1:.*]]: !fir.ref<f32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<f32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<f32>
+! CHECK:   %[[COMBINED:.*]] = arith.addf %[[LOAD0]], %[[LOAD1]] fastmath<contract> : f32
+! CHECK:   fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<f32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<f32>
 ! CHECK: }
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100x10x2xi32 : !fir.ref<!fir.array<100x10x2xi32>> reduction_operator <add> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100x10x2xi32>>):
-! CHECK:   %[[CST:.*]] = arith.constant dense<0> : vector<100x10x2xi32>
-! CHECK:   acc.yield %[[CST]] : vector<100x10x2xi32>
+! CHECK:   %[[INIT:.*]] = arith.constant 0 : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10x2xi32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<100x10x2xi32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10x2xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10x2xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
@@ -372,8 +478,9 @@
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100x10xi32 : !fir.ref<!fir.array<100x10xi32>> reduction_operator <add> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100x10xi32>>):
-! CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<100x10xi32>
-! CHECK: acc.yield %[[CST]] : vector<100x10xi32>
+! CHECK:   %[[INIT:.*]] = arith.constant 0 : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xi32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<100x10xi32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
@@ -397,8 +504,9 @@
 
 ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <add> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xi32>>):
-! CHECK:   %[[CST:.*]] = arith.constant dense<0> : vector<100xi32>
-! CHECK:   acc.yield %[[CST]] : vector<100xi32>
+! CHECK:   %[[INIT:.*]] = arith.constant 0 : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<!fir.array<100xi32>>
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xi32>>):
 ! CHECK:   %[[LB:.*]] = arith.constant 0 : index
@@ -415,14 +523,19 @@
 ! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xi32>>
 ! CHECK: }
 
-! CHECK-LABEL: acc.reduction.recipe @reduction_add_i32 : i32 reduction_operator <add> init {
-! CHECK: ^bb0(%{{.*}}: i32):
+! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_i32 : !fir.ref<i32> reduction_operator <add> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
 ! CHECK:   %[[INIT:.*]] = arith.constant 0 : i32
-! CHECK:   acc.yield %[[INIT]] : i32
+! CHECK:   %[[ALLOCA:.*]] = fir.alloca i32
+! CHECK:   fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK: } combiner {
-! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32):
-! CHECK:   %[[COMBINED:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i32
-! CHECK:   acc.yield %[[COMBINED]] : i32
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<i32>, %[[ARG1:.*]]: !fir.ref<i32>):
+! CHECK:   %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref<i32>
+! CHECK:   %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
+! CHECK:   %[[COMBINED:.*]] = arith.addi %[[LOAD0]], %[[LOAD1]] : i32
+! CHECK:   fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref<i32>
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<i32>
 ! CHECK: }
 
 subroutine acc_reduction_add_int(a, b)
@@ -438,7 +551,7 @@ subroutine acc_reduction_add_int(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_add_int(
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<i32> {fir.bindc_name = "b"})
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"} 
-! CHECK:       acc.loop reduction(@reduction_add_i32 -> %[[RED_B]] : !fir.ref<i32>)
+! CHECK:       acc.loop reduction(@reduction_add_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
 subroutine acc_reduction_add_int_array_1d(a, b)
   integer :: a(100)
@@ -506,7 +619,7 @@ subroutine acc_reduction_add_float(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_add_float(
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<f32> {fir.bindc_name = "b"})
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_add_f32 -> %[[RED_B]] : !fir.ref<f32>)
+! CHECK:       acc.loop reduction(@reduction_add_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
 
 subroutine acc_reduction_add_float_array_1d(a, b)
   real :: a(100), b(100)
@@ -536,7 +649,7 @@ subroutine acc_reduction_mul_int(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_mul_int(
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<i32> {fir.bindc_name = "b"})
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"}
-! CHECK:       acc.loop reduction(@reduction_mul_i32 -> %[[RED_B]] : !fir.ref<i32>)
+! CHECK:       acc.loop reduction(@reduction_mul_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
 subroutine acc_reduction_mul_int_array_1d(a, b)
   integer :: a(100)
@@ -566,7 +679,7 @@ subroutine acc_reduction_mul_float(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_mul_float(
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<f32> {fir.bindc_name = "b"})
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"} 
-! CHECK:       acc.loop reduction(@reduction_mul_f32 -> %[[RED_B]] : !fir.ref<f32>)
+! CHECK:       acc.loop reduction(@reduction_mul_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
 
 subroutine acc_reduction_mul_float_array_1d(a, b)
   real :: a(100), b(100)
@@ -596,7 +709,7 @@ subroutine acc_reduction_min_int(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_min_int(
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<i32> {fir.bindc_name = "b"})
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"} 
-! CHECK:       acc.loop reduction(@reduction_min_i32 -> %[[RED_B]] : !fir.ref<i32>)
+! CHECK:       acc.loop reduction(@reduction_min_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
 subroutine acc_reduction_min_int_array_1d(a, b)
   integer :: a(100), b(100)
@@ -626,7 +739,7 @@ subroutine acc_reduction_min_float(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_min_float(
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<f32> {fir.bindc_name = "b"})
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"} 
-! CHECK:       acc.loop reduction(@reduction_min_f32 -> %[[RED_B]] : !fir.ref<f32>)
+! CHECK:       acc.loop reduction(@reduction_min_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
 
 subroutine acc_reduction_min_float_array2d(a, b)
   real :: a(100, 10), b(100, 10)
@@ -659,7 +772,7 @@ subroutine acc_reduction_max_int(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_max_int(
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<i32> {fir.bindc_name = "b"})
 ! CHECL:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"} 
-! CHECK:       acc.loop reduction(@reduction_max_i32 -> %[[RED_B]] : !fir.ref<i32>)
+! CHECK:       acc.loop reduction(@reduction_max_ref_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
 subroutine acc_reduction_max_int_array2d(a, b)
   integer :: a(100, 10), b(100, 10)
@@ -691,7 +804,7 @@ subroutine acc_reduction_max_float(a, b)
 ! CHECK-LABEL: func.func @_QPacc_reduction_max_float(
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<f32> {fir.bindc_name = "b"})
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"} 
-! CHECK:       acc.loop reduction(@reduction_max_f32 -> %[[RED_B]] : !fir.ref<f32>)
+! CHECK:       acc.loop reduction(@reduction_max_ref_f32 -> %[[RED_B]] : !fir.ref<f32>)
 
 subroutine acc_reduction_max_float_array1d(a, b)
   real :: a(100), b(100)
@@ -716,7 +829,7 @@ subroutine acc_reduction_iand()
 
 ! CHECK-LABEL: func.func @_QPacc_reduction_iand()
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>)   -> !fir.ref<i32> {name = "i"}
-! CHECK: acc.parallel   reduction(@reduction_iand_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.parallel   reduction(@reduction_iand_ref_i32 -> %[[RED]] : !fir.ref<i32>)
 
 subroutine acc_reduction_ior()
   integer :: i
@@ -726,7 +839,7 @@ subroutine acc_reduction_ior()
 
 ! CHECK-LABEL: func.func @_QPacc_reduction_ior()
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>)   -> !fir.ref<i32> {name = "i"}
-! CHECK: acc.parallel reduction(@reduction_ior_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.parallel reduction(@reduction_ior_ref_i32 -> %[[RED]] : !fir.ref<i32>)
 
 subroutine acc_reduction_ieor()
   integer :: i
@@ -736,7 +849,7 @@ subroutine acc_reduction_ieor()
 
 ! CHECK-LABEL: func.func @_QPacc_reduction_ieor()
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "i"}
-! CHECK: acc.parallel reduction(@reduction_xor_i32 -> %[[RED]] : !fir.ref<i32>)
+! CHECK: acc.parallel reduction(@reduction_xor_ref_i32 -> %[[RED]] : !fir.ref<i32>)
 
 subroutine acc_reduction_and()
   logical :: l
@@ -746,7 +859,7 @@ subroutine acc_reduction_and()
 
 ! CHECK-LABEL: func.func @_QPacc_reduction_and()
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%0 : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
-! CHECK: acc.parallel reduction(@reduction_land_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.parallel reduction(@reduction_land_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
 
 subroutine acc_reduction_or()
   logical :: l
@@ -756,7 +869,7 @@ subroutine acc_reduction_or()
 
 ! CHECK-LABEL: func.func @_QPacc_reduction_or()
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
-! CHECK: acc.parallel reduction(@reduction_lor_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.parallel reduction(@reduction_lor_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
 
 subroutine acc_reduction_eqv()
   logical :: l
@@ -766,7 +879,7 @@ subroutine acc_reduction_eqv()
 
 ! CHECK-LABEL: func.func @_QPacc_reduction_eqv()
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
-! CHECK: acc.parallel reduction(@reduction_eqv_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.parallel reduction(@reduction_eqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
 
 subroutine acc_reduction_neqv()
   logical :: l
@@ -776,7 +889,7 @@ subroutine acc_reduction_neqv()
 
 ! CHECK-LABEL: func.func @_QPacc_reduction_neqv()
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.logical<4>>) -> !fir.ref<!fir.logical<4>> {name = "l"}
-! CHECK: acc.parallel reduction(@reduction_neqv_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
+! CHECK: acc.parallel reduction(@reduction_neqv_ref_l32 -> %[[RED]] : !fir.ref<!fir.logical<4>>)
 
 subroutine acc_reduction_add_cmplx()
   complex :: c
@@ -786,7 +899,7 @@ subroutine acc_reduction_add_cmplx()
 
 ! CHECK-LABEL: func.func @_QPacc_reduction_add_cmplx()
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.complex<4>>) -> !fir.ref<!fir.complex<4>> {name = "c"}
-! CHECK: acc.parallel reduction(@reduction_add_z32 -> %[[RED]] : !fir.ref<!fir.complex<4>>)
+! CHECK: acc.parallel reduction(@reduction_add_ref_z32 -> %[[RED]] : !fir.ref<!fir.complex<4>>)
 
 subroutine acc_reduction_mul_cmplx()
   complex :: c
@@ -796,7 +909,7 @@ subroutine acc_reduction_mul_cmplx()
 
 ! CHECK-LABEL: func.func @_QPacc_reduction_mul_cmplx()
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.complex<4>>) -> !fir.ref<!fir.complex<4>> {name = "c"}
-! CHECK: acc.parallel reduction(@reduction_mul_z32 -> %[[RED]] : !fir.ref<!fir.complex<4>>)
+! CHECK: acc.parallel reduction(@reduction_mul_ref_z32 -> %[[RED]] : !fir.ref<!fir.complex<4>>)
 
 subroutine acc_reduction_add_alloc()
   integer, allocatable :: i
@@ -810,7 +923,7 @@ subroutine acc_reduction_add_alloc()
 ! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA]] : !fir.ref<!fir.box<!fir.heap<i32>>>
 ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap<i32>) -> !fir.heap<i32> {name = "i"}
-! CHECK: acc.parallel reduction(@reduction_add_i32 -> %[[RED]] : !fir.heap<i32>)
+! CHECK: acc.parallel reduction(@reduction_add_heap_i32 -> %[[RED]] : !fir.heap<i32>)
 
 subroutine acc_reduction_add_pointer(i)
   integer, pointer :: i
@@ -823,7 +936,7 @@ subroutine acc_reduction_add_pointer(i)
 ! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref<!fir.box<!fir.ptr<i32>>>
 ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<i32>>) -> !fir.ptr<i32>
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ptr<i32>) -> !fir.ptr<i32> {name = "i"}
-! CHECK: acc.parallel reduction(@reduction_add_i32 -> %[[RED]] : !fir.ptr<i32>)
+! CHECK: acc.parallel reduction(@reduction_add_ptr_i32 -> %[[RED]] : !fir.ptr<i32>)
 
 subroutine acc_reduction_add_static_slice(a)
   integer :: a(100)

diff  --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90
index bb31d770573070..a3fe901a73ce49 100644
--- a/flang/test/Lower/OpenACC/acc-serial-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90
@@ -667,8 +667,8 @@ subroutine acc_serial_loop
     reduction_i = 1
   end do
 
-! CHECK:      acc.serial reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_i32 -> %{{.*}} : !fir.ref<i32>) {
-! CHECK:        acc.loop reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:      acc.serial reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}

diff  --git a/flang/test/Lower/OpenACC/acc-serial.f90 b/flang/test/Lower/OpenACC/acc-serial.f90
index ffaef0769dbd64..7589f659d14b45 100644
--- a/flang/test/Lower/OpenACC/acc-serial.f90
+++ b/flang/test/Lower/OpenACC/acc-serial.f90
@@ -262,7 +262,7 @@ subroutine acc_serial
 !$acc serial reduction(+:reduction_r) reduction(*:reduction_i)
 !$acc end serial
 
-! CHECK:      acc.serial reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK:      acc.serial reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}