[clang] 9c1085c - [Clang][OpenMP] Add the support for floating-point variables for specific atomic clauses
Shilei Tian via cfe-commits
cfe-commits at lists.llvm.org
Tue May 3 08:31:00 PDT 2022
Author: Shilei Tian
Date: 2022-05-03T11:30:54-04:00
New Revision: 9c1085c7e20bdd7c4a487f50313ebeeb2b6683b8
URL: https://github.com/llvm/llvm-project/commit/9c1085c7e20bdd7c4a487f50313ebeeb2b6683b8
DIFF: https://github.com/llvm/llvm-project/commit/9c1085c7e20bdd7c4a487f50313ebeeb2b6683b8.diff
LOG: [Clang][OpenMP] Add the support for floating-point variables for specific atomic clauses
Currently when using `atomic update` with floating-point variables, if
the operation is add or sub, `cmpxchg`, instead of `atomicrmw` is emitted, as
shown in [1]. In fact, about three years ago, llvm-svn: 351850 added the
support for FP operations. This patch adds the support in OpenMP as well.
[1] https://godbolt.org/z/M7b4ba9na
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D124724
Added:
Modified:
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/test/OpenMP/atomic_capture_codegen.cpp
clang/test/OpenMP/atomic_update_codegen.cpp
clang/test/OpenMP/for_reduction_codegen.cpp
clang/test/OpenMP/parallel_reduction_codegen.cpp
clang/test/OpenMP/reduction_implicit_map.cpp
clang/test/OpenMP/sections_reduction_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index fc8156a78d6a7..5e75e8884bb49 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5838,25 +5838,38 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
// Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
// expression is simple and atomic is allowed for the given type for the
// target platform.
- if (BO == BO_Comma || !Update.isScalar() ||
- !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
+ if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
(!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
(Update.getScalarVal()->getType() !=
X.getAddress(CGF).getElementType())) ||
- !X.getAddress(CGF).getElementType()->isIntegerTy() ||
!Context.getTargetInfo().hasBuiltinAtomic(
Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
return std::make_pair(false, RValue::get(nullptr));
+ auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
+ if (T->isIntegerTy())
+ return true;
+
+ if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
+ return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T));
+
+ return false;
+ };
+
+ if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
+ !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO))
+ return std::make_pair(false, RValue::get(nullptr));
+
+ bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy();
llvm::AtomicRMWInst::BinOp RMWOp;
switch (BO) {
case BO_Add:
- RMWOp = llvm::AtomicRMWInst::Add;
+ RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
break;
case BO_Sub:
if (!IsXLHSInRHSPart)
return std::make_pair(false, RValue::get(nullptr));
- RMWOp = llvm::AtomicRMWInst::Sub;
+ RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
break;
case BO_And:
RMWOp = llvm::AtomicRMWInst::And;
@@ -5914,9 +5927,13 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
}
llvm::Value *UpdateVal = Update.getScalarVal();
if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
- UpdateVal = CGF.Builder.CreateIntCast(
- IC, X.getAddress(CGF).getElementType(),
- X.getType()->hasSignedIntegerRepresentation());
+ if (IsInteger)
+ UpdateVal = CGF.Builder.CreateIntCast(
+ IC, X.getAddress(CGF).getElementType(),
+ X.getType()->hasSignedIntegerRepresentation());
+ else
+ UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
+ X.getAddress(CGF).getElementType());
}
llvm::Value *Res =
CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
diff --git a/clang/test/OpenMP/atomic_capture_codegen.cpp b/clang/test/OpenMP/atomic_capture_codegen.cpp
index c5f45a39232c0..95509df9ba935 100644
--- a/clang/test/OpenMP/atomic_capture_codegen.cpp
+++ b/clang/test/OpenMP/atomic_capture_codegen.cpp
@@ -216,20 +216,8 @@ int main(void) {
#pragma omp atomic capture
llv = ullx |= ullv;
// CHECK: [[EXPR:%.+]] = load float, float* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i32, i32* bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4
-// CHECK: br label %[[CONT:.+]]
-// CHECK: [[CONT]]
-// CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
-// CHECK: [[TEMP_I:%.+]] = bitcast float* [[TEMP:%.+]] to i32*
-// CHECK: [[OLD:%.+]] = bitcast i32 [[EXPECTED]] to float
+// CHECK: [[OLD:%.+]] = atomicrmw fadd float* @{{.+}}, float [[EXPR]] monotonic, align 4
// CHECK: [[ADD:%.+]] = fadd float [[OLD]], [[EXPR]]
-// CHECK: store float [[ADD]], float* [[TEMP]],
-// CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP_I]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
-// CHECK: [[OLD_X:%.+]] = extractvalue { i32, i1 } [[RES]], 0
-// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
-// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
-// CHECK: [[EXIT]]
// CHECK: [[CAST:%.+]] = fpext float [[ADD]] to double
// CHECK: store double [[CAST]], double* @{{.+}},
#pragma omp atomic capture
diff --git a/clang/test/OpenMP/atomic_update_codegen.cpp b/clang/test/OpenMP/atomic_update_codegen.cpp
index 398c2fdc29f32..4cc301bee337d 100644
--- a/clang/test/OpenMP/atomic_update_codegen.cpp
+++ b/clang/test/OpenMP/atomic_update_codegen.cpp
@@ -82,7 +82,7 @@ float2 float2x;
register int rix __asm__("esp");
int main(void) {
-// CHECK-NOT: atomicrmw
+// CHECK: atomicrmw fadd double* @{{.+}}, double 1.000000e+00 monotonic, align 8
#pragma omp atomic
++dv;
// CHECK: atomicrmw add i8* @{{.+}}, i8 1 monotonic, align 1
@@ -192,20 +192,7 @@ int main(void) {
#pragma omp atomic
ullx |= ullv;
// CHECK: [[EXPR:%.+]] = load float, float* @{{.+}},
-// CHECK: [[OLD:%.+]] = load atomic i32, i32* bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4
-// CHECK: br label %[[CONT:.+]]
-// CHECK: [[CONT]]
-// CHECK: [[EXPECTED:%.+]] = phi i32 [ [[OLD]], %{{.+}} ], [ [[PREV:%.+]], %[[CONT]] ]
-// CHECK: [[BITCAST:%.+]] = bitcast float* [[TEMP:%.+]] to i32*
-// CHECK: [[OLD:%.+]] = bitcast i32 [[EXPECTED]] to float
-// CHECK: [[ADD:%.+]] = fadd float [[OLD]], [[EXPR]]
-// CHECK: store float [[ADD]], float* [[TEMP]],
-// CHECK: [[DESIRED:%.+]] = load i32, i32* [[BITCAST]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
-// CHECK: [[PREV:%.+]] = extractvalue { i32, i1 } [[RES]], 0
-// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
-// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
-// CHECK: [[EXIT]]
+// CHECK: atomicrmw fadd float* @{{.+}}, float [[EXPR]] monotonic, align 4
#pragma omp atomic update
fx = fx + fv;
// CHECK: [[EXPR:%.+]] = load double, double* @{{.+}},
diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp
index 4820be8eaa490..0fa00b31f3472 100644
--- a/clang/test/OpenMP/for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen.cpp
@@ -698,11 +698,9 @@ int main() {
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK1-NEXT: [[REF_TMP23:%.*]] = alloca [[STRUCT_S]], align 4
// CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[_TMP22:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[REF_TMP25:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK1-NEXT: [[ATOMIC_TEMP35:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[_TMP36:%.*]] = alloca float, align 4
+// CHECK1-NEXT: [[_TMP31:%.*]] = alloca float, align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8
@@ -843,77 +841,59 @@ int main() {
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
// CHECK1-NEXT: [[TMP44:%.*]] = load float, float* [[T_VAR3]], align 4
-// CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP45]] monotonic, align 4
-// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP46:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP54:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT: [[TMP47:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK1-NEXT: [[TMP48:%.*]] = bitcast i32 [[TMP46]] to float
-// CHECK1-NEXT: store float [[TMP48]], float* [[_TMP22]], align 4
-// CHECK1-NEXT: [[TMP49:%.*]] = load float, float* [[_TMP22]], align 4
-// CHECK1-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR3]], align 4
-// CHECK1-NEXT: [[ADD23:%.*]] = fadd float [[TMP49]], [[TMP50]]
-// CHECK1-NEXT: store float [[ADD23]], float* [[ATOMIC_TEMP]], align 4
-// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP47]], align 4
-// CHECK1-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK1-NEXT: [[TMP53:%.*]] = cmpxchg i32* [[TMP52]], i32 [[TMP46]], i32 [[TMP51]] monotonic monotonic, align 4
-// CHECK1-NEXT: [[TMP54]] = extractvalue { i32, i1 } [[TMP53]], 0
-// CHECK1-NEXT: [[TMP55:%.*]] = extractvalue { i32, i1 } [[TMP53]], 1
-// CHECK1-NEXT: br i1 [[TMP55]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK1: atomic_exit:
+// CHECK1-NEXT: [[TMP45:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP44]] monotonic, align 4
// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT: [[CALL24:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]])
-// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[TMP7]] to i8*
-// CHECK1-NEXT: [[TMP57:%.*]] = bitcast %struct.S* [[CALL24]] to i8*
-// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP56]], i8* align 4 [[TMP57]], i64 4, i1 false)
+// CHECK1-NEXT: [[CALL22:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]])
+// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP7]] to i8*
+// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[CALL22]] to i8*
+// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false)
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT: [[CALL26:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
-// CHECK1-NEXT: [[TOBOOL27:%.*]] = fcmp une float [[CALL26]], 0.000000e+00
-// CHECK1-NEXT: br i1 [[TOBOOL27]], label [[LAND_RHS28:%.*]], label [[LAND_END31:%.*]]
-// CHECK1: land.rhs28:
-// CHECK1-NEXT: [[CALL29:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]])
-// CHECK1-NEXT: [[TOBOOL30:%.*]] = fcmp une float [[CALL29]], 0.000000e+00
-// CHECK1-NEXT: br label [[LAND_END31]]
-// CHECK1: land.end31:
-// CHECK1-NEXT: [[TMP58:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL30]], [[LAND_RHS28]] ]
-// CHECK1-NEXT: [[CONV32:%.*]] = uitofp i1 [[TMP58]] to float
-// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]], float noundef [[CONV32]])
-// CHECK1-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
-// CHECK1-NEXT: [[TMP60:%.*]] = bitcast %struct.S* [[REF_TMP25]] to i8*
-// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 4, i1 false)
-// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]]) #[[ATTR5]]
+// CHECK1-NEXT: [[CALL24:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
+// CHECK1-NEXT: [[TOBOOL25:%.*]] = fcmp une float [[CALL24]], 0.000000e+00
+// CHECK1-NEXT: br i1 [[TOBOOL25]], label [[LAND_RHS26:%.*]], label [[LAND_END29:%.*]]
+// CHECK1: land.rhs26:
+// CHECK1-NEXT: [[CALL27:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]])
+// CHECK1-NEXT: [[TOBOOL28:%.*]] = fcmp une float [[CALL27]], 0.000000e+00
+// CHECK1-NEXT: br label [[LAND_END29]]
+// CHECK1: land.end29:
+// CHECK1-NEXT: [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL28]], [[LAND_RHS26]] ]
+// CHECK1-NEXT: [[CONV30:%.*]] = uitofp i1 [[TMP48]] to float
+// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]], float noundef [[CONV30]])
+// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
+// CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[REF_TMP23]] to i8*
+// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false)
+// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]]) #[[ATTR5]]
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT: [[TMP61:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK1-NEXT: [[TMP62:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK1-NEXT: [[ATOMIC_LOAD33:%.*]] = load atomic i32, i32* [[TMP62]] monotonic, align 4
-// CHECK1-NEXT: br label [[ATOMIC_CONT34:%.*]]
-// CHECK1: atomic_cont34:
-// CHECK1-NEXT: [[TMP63:%.*]] = phi i32 [ [[ATOMIC_LOAD33]], [[LAND_END31]] ], [ [[TMP73:%.*]], [[COND_END40:%.*]] ]
-// CHECK1-NEXT: [[TMP64:%.*]] = bitcast float* [[ATOMIC_TEMP35]] to i32*
-// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32 [[TMP63]] to float
-// CHECK1-NEXT: store float [[TMP65]], float* [[_TMP36]], align 4
-// CHECK1-NEXT: [[TMP66:%.*]] = load float, float* [[_TMP36]], align 4
-// CHECK1-NEXT: [[TMP67:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK1-NEXT: [[CMP37:%.*]] = fcmp olt float [[TMP66]], [[TMP67]]
-// CHECK1-NEXT: br i1 [[CMP37]], label [[COND_TRUE38:%.*]], label [[COND_FALSE39:%.*]]
-// CHECK1: cond.true38:
-// CHECK1-NEXT: [[TMP68:%.*]] = load float, float* [[_TMP36]], align 4
-// CHECK1-NEXT: br label [[COND_END40]]
-// CHECK1: cond.false39:
-// CHECK1-NEXT: [[TMP69:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK1-NEXT: br label [[COND_END40]]
-// CHECK1: cond.end40:
-// CHECK1-NEXT: [[COND41:%.*]] = phi float [ [[TMP68]], [[COND_TRUE38]] ], [ [[TMP69]], [[COND_FALSE39]] ]
-// CHECK1-NEXT: store float [[COND41]], float* [[ATOMIC_TEMP35]], align 4
-// CHECK1-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP64]], align 4
-// CHECK1-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK1-NEXT: [[TMP72:%.*]] = cmpxchg i32* [[TMP71]], i32 [[TMP63]], i32 [[TMP70]] monotonic monotonic, align 4
-// CHECK1-NEXT: [[TMP73]] = extractvalue { i32, i1 } [[TMP72]], 0
-// CHECK1-NEXT: [[TMP74:%.*]] = extractvalue { i32, i1 } [[TMP72]], 1
-// CHECK1-NEXT: br i1 [[TMP74]], label [[ATOMIC_EXIT42:%.*]], label [[ATOMIC_CONT34]]
-// CHECK1: atomic_exit42:
+// CHECK1-NEXT: [[TMP51:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP52]] monotonic, align 4
+// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
+// CHECK1: atomic_cont:
+// CHECK1-NEXT: [[TMP53:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END29]] ], [ [[TMP63:%.*]], [[COND_END35:%.*]] ]
+// CHECK1-NEXT: [[TMP54:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK1-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP53]] to float
+// CHECK1-NEXT: store float [[TMP55]], float* [[_TMP31]], align 4
+// CHECK1-NEXT: [[TMP56:%.*]] = load float, float* [[_TMP31]], align 4
+// CHECK1-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK1-NEXT: [[CMP32:%.*]] = fcmp olt float [[TMP56]], [[TMP57]]
+// CHECK1-NEXT: br i1 [[CMP32]], label [[COND_TRUE33:%.*]], label [[COND_FALSE34:%.*]]
+// CHECK1: cond.true33:
+// CHECK1-NEXT: [[TMP58:%.*]] = load float, float* [[_TMP31]], align 4
+// CHECK1-NEXT: br label [[COND_END35]]
+// CHECK1: cond.false34:
+// CHECK1-NEXT: [[TMP59:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK1-NEXT: br label [[COND_END35]]
+// CHECK1: cond.end35:
+// CHECK1-NEXT: [[COND36:%.*]] = phi float [ [[TMP58]], [[COND_TRUE33]] ], [ [[TMP59]], [[COND_FALSE34]] ]
+// CHECK1-NEXT: store float [[COND36]], float* [[ATOMIC_TEMP]], align 4
+// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP54]], align 4
+// CHECK1-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK1-NEXT: [[TMP62:%.*]] = cmpxchg i32* [[TMP61]], i32 [[TMP53]], i32 [[TMP60]] monotonic monotonic, align 4
+// CHECK1-NEXT: [[TMP63]] = extractvalue { i32, i1 } [[TMP62]], 0
+// CHECK1-NEXT: [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1
+// CHECK1-NEXT: br i1 [[TMP64]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK1: atomic_exit:
// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
@@ -4633,11 +4613,9 @@ int main() {
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK2-NEXT: [[REF_TMP23:%.*]] = alloca [[STRUCT_S]], align 4
// CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
-// CHECK2-NEXT: [[_TMP22:%.*]] = alloca float, align 4
-// CHECK2-NEXT: [[REF_TMP25:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK2-NEXT: [[ATOMIC_TEMP35:%.*]] = alloca float, align 4
-// CHECK2-NEXT: [[_TMP36:%.*]] = alloca float, align 4
+// CHECK2-NEXT: [[_TMP31:%.*]] = alloca float, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8
@@ -4778,77 +4756,59 @@ int main() {
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.case2:
// CHECK2-NEXT: [[TMP44:%.*]] = load float, float* [[T_VAR3]], align 4
-// CHECK2-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP45]] monotonic, align 4
-// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK2: atomic_cont:
-// CHECK2-NEXT: [[TMP46:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP54:%.*]], [[ATOMIC_CONT]] ]
-// CHECK2-NEXT: [[TMP47:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK2-NEXT: [[TMP48:%.*]] = bitcast i32 [[TMP46]] to float
-// CHECK2-NEXT: store float [[TMP48]], float* [[_TMP22]], align 4
-// CHECK2-NEXT: [[TMP49:%.*]] = load float, float* [[_TMP22]], align 4
-// CHECK2-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR3]], align 4
-// CHECK2-NEXT: [[ADD23:%.*]] = fadd float [[TMP49]], [[TMP50]]
-// CHECK2-NEXT: store float [[ADD23]], float* [[ATOMIC_TEMP]], align 4
-// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP47]], align 4
-// CHECK2-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK2-NEXT: [[TMP53:%.*]] = cmpxchg i32* [[TMP52]], i32 [[TMP46]], i32 [[TMP51]] monotonic monotonic, align 4
-// CHECK2-NEXT: [[TMP54]] = extractvalue { i32, i1 } [[TMP53]], 0
-// CHECK2-NEXT: [[TMP55:%.*]] = extractvalue { i32, i1 } [[TMP53]], 1
-// CHECK2-NEXT: br i1 [[TMP55]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK2: atomic_exit:
+// CHECK2-NEXT: [[TMP45:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP44]] monotonic, align 4
// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT: [[CALL24:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]])
-// CHECK2-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[TMP7]] to i8*
-// CHECK2-NEXT: [[TMP57:%.*]] = bitcast %struct.S* [[CALL24]] to i8*
-// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP56]], i8* align 4 [[TMP57]], i64 4, i1 false)
+// CHECK2-NEXT: [[CALL22:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]])
+// CHECK2-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP7]] to i8*
+// CHECK2-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[CALL22]] to i8*
+// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false)
// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT: [[CALL26:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
-// CHECK2-NEXT: [[TOBOOL27:%.*]] = fcmp une float [[CALL26]], 0.000000e+00
-// CHECK2-NEXT: br i1 [[TOBOOL27]], label [[LAND_RHS28:%.*]], label [[LAND_END31:%.*]]
-// CHECK2: land.rhs28:
-// CHECK2-NEXT: [[CALL29:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]])
-// CHECK2-NEXT: [[TOBOOL30:%.*]] = fcmp une float [[CALL29]], 0.000000e+00
-// CHECK2-NEXT: br label [[LAND_END31]]
-// CHECK2: land.end31:
-// CHECK2-NEXT: [[TMP58:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL30]], [[LAND_RHS28]] ]
-// CHECK2-NEXT: [[CONV32:%.*]] = uitofp i1 [[TMP58]] to float
-// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]], float noundef [[CONV32]])
-// CHECK2-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
-// CHECK2-NEXT: [[TMP60:%.*]] = bitcast %struct.S* [[REF_TMP25]] to i8*
-// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 4, i1 false)
-// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP25]]) #[[ATTR5]]
+// CHECK2-NEXT: [[CALL24:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
+// CHECK2-NEXT: [[TOBOOL25:%.*]] = fcmp une float [[CALL24]], 0.000000e+00
+// CHECK2-NEXT: br i1 [[TOBOOL25]], label [[LAND_RHS26:%.*]], label [[LAND_END29:%.*]]
+// CHECK2: land.rhs26:
+// CHECK2-NEXT: [[CALL27:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]])
+// CHECK2-NEXT: [[TOBOOL28:%.*]] = fcmp une float [[CALL27]], 0.000000e+00
+// CHECK2-NEXT: br label [[LAND_END29]]
+// CHECK2: land.end29:
+// CHECK2-NEXT: [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL28]], [[LAND_RHS26]] ]
+// CHECK2-NEXT: [[CONV30:%.*]] = uitofp i1 [[TMP48]] to float
+// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]], float noundef [[CONV30]])
+// CHECK2-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
+// CHECK2-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[REF_TMP23]] to i8*
+// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false)
+// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]]) #[[ATTR5]]
// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT: [[TMP61:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK2-NEXT: [[TMP62:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK2-NEXT: [[ATOMIC_LOAD33:%.*]] = load atomic i32, i32* [[TMP62]] monotonic, align 4
-// CHECK2-NEXT: br label [[ATOMIC_CONT34:%.*]]
-// CHECK2: atomic_cont34:
-// CHECK2-NEXT: [[TMP63:%.*]] = phi i32 [ [[ATOMIC_LOAD33]], [[LAND_END31]] ], [ [[TMP73:%.*]], [[COND_END40:%.*]] ]
-// CHECK2-NEXT: [[TMP64:%.*]] = bitcast float* [[ATOMIC_TEMP35]] to i32*
-// CHECK2-NEXT: [[TMP65:%.*]] = bitcast i32 [[TMP63]] to float
-// CHECK2-NEXT: store float [[TMP65]], float* [[_TMP36]], align 4
-// CHECK2-NEXT: [[TMP66:%.*]] = load float, float* [[_TMP36]], align 4
-// CHECK2-NEXT: [[TMP67:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK2-NEXT: [[CMP37:%.*]] = fcmp olt float [[TMP66]], [[TMP67]]
-// CHECK2-NEXT: br i1 [[CMP37]], label [[COND_TRUE38:%.*]], label [[COND_FALSE39:%.*]]
-// CHECK2: cond.true38:
-// CHECK2-NEXT: [[TMP68:%.*]] = load float, float* [[_TMP36]], align 4
-// CHECK2-NEXT: br label [[COND_END40]]
-// CHECK2: cond.false39:
-// CHECK2-NEXT: [[TMP69:%.*]] = load float, float* [[T_VAR17]], align 4
-// CHECK2-NEXT: br label [[COND_END40]]
-// CHECK2: cond.end40:
-// CHECK2-NEXT: [[COND41:%.*]] = phi float [ [[TMP68]], [[COND_TRUE38]] ], [ [[TMP69]], [[COND_FALSE39]] ]
-// CHECK2-NEXT: store float [[COND41]], float* [[ATOMIC_TEMP35]], align 4
-// CHECK2-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP64]], align 4
-// CHECK2-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK2-NEXT: [[TMP72:%.*]] = cmpxchg i32* [[TMP71]], i32 [[TMP63]], i32 [[TMP70]] monotonic monotonic, align 4
-// CHECK2-NEXT: [[TMP73]] = extractvalue { i32, i1 } [[TMP72]], 0
-// CHECK2-NEXT: [[TMP74:%.*]] = extractvalue { i32, i1 } [[TMP72]], 1
-// CHECK2-NEXT: br i1 [[TMP74]], label [[ATOMIC_EXIT42:%.*]], label [[ATOMIC_CONT34]]
-// CHECK2: atomic_exit42:
+// CHECK2-NEXT: [[TMP51:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK2-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP52]] monotonic, align 4
+// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
+// CHECK2: atomic_cont:
+// CHECK2-NEXT: [[TMP53:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END29]] ], [ [[TMP63:%.*]], [[COND_END35:%.*]] ]
+// CHECK2-NEXT: [[TMP54:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK2-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP53]] to float
+// CHECK2-NEXT: store float [[TMP55]], float* [[_TMP31]], align 4
+// CHECK2-NEXT: [[TMP56:%.*]] = load float, float* [[_TMP31]], align 4
+// CHECK2-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK2-NEXT: [[CMP32:%.*]] = fcmp olt float [[TMP56]], [[TMP57]]
+// CHECK2-NEXT: br i1 [[CMP32]], label [[COND_TRUE33:%.*]], label [[COND_FALSE34:%.*]]
+// CHECK2: cond.true33:
+// CHECK2-NEXT: [[TMP58:%.*]] = load float, float* [[_TMP31]], align 4
+// CHECK2-NEXT: br label [[COND_END35]]
+// CHECK2: cond.false34:
+// CHECK2-NEXT: [[TMP59:%.*]] = load float, float* [[T_VAR17]], align 4
+// CHECK2-NEXT: br label [[COND_END35]]
+// CHECK2: cond.end35:
+// CHECK2-NEXT: [[COND36:%.*]] = phi float [ [[TMP58]], [[COND_TRUE33]] ], [ [[TMP59]], [[COND_FALSE34]] ]
+// CHECK2-NEXT: store float [[COND36]], float* [[ATOMIC_TEMP]], align 4
+// CHECK2-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP54]], align 4
+// CHECK2-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK2-NEXT: [[TMP62:%.*]] = cmpxchg i32* [[TMP61]], i32 [[TMP53]], i32 [[TMP60]] monotonic monotonic, align 4
+// CHECK2-NEXT: [[TMP63]] = extractvalue { i32, i1 } [[TMP62]], 0
+// CHECK2-NEXT: [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1
+// CHECK2-NEXT: br i1 [[TMP64]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK2: atomic_exit:
// CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.default:
@@ -8425,10 +8385,6 @@ int main() {
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8
-// CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK3-NEXT: [[_TMP7:%.*]] = alloca double, align 8
-// CHECK3-NEXT: [[ATOMIC_TEMP11:%.*]] = alloca double, align 8
-// CHECK3-NEXT: [[_TMP12:%.*]] = alloca double, align 8
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** @g1, align 8
@@ -8514,43 +8470,9 @@ int main() {
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[G]], align 8
-// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8
-// CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK3: atomic_cont:
-// CHECK3-NEXT: [[TMP26:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP33:%.*]], [[ATOMIC_CONT]] ]
-// CHECK3-NEXT: [[TMP27:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i64 [[TMP26]] to double
-// CHECK3-NEXT: store double [[TMP28]], double* [[_TMP7]], align 8
-// CHECK3-NEXT: [[TMP29:%.*]] = load double, double* [[_TMP7]], align 8
-// CHECK3-NEXT: [[TMP30:%.*]] = load double, double* [[G]], align 8
-// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[TMP29]], [[TMP30]]
-// CHECK3-NEXT: store double [[ADD8]], double* [[ATOMIC_TEMP]], align 8
-// CHECK3-NEXT: [[TMP31:%.*]] = load i64, i64* [[TMP27]], align 8
-// CHECK3-NEXT: [[TMP32:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP26]], i64 [[TMP31]] monotonic monotonic, align 8
-// CHECK3-NEXT: [[TMP33]] = extractvalue { i64, i1 } [[TMP32]], 0
-// CHECK3-NEXT: [[TMP34:%.*]] = extractvalue { i64, i1 } [[TMP32]], 1
-// CHECK3-NEXT: br i1 [[TMP34]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK3: atomic_exit:
-// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[G1]], align 8
-// CHECK3-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP1]] to i64*
-// CHECK3-NEXT: [[ATOMIC_LOAD9:%.*]] = load atomic i64, i64* [[TMP36]] monotonic, align 8
-// CHECK3-NEXT: br label [[ATOMIC_CONT10:%.*]]
-// CHECK3: atomic_cont10:
-// CHECK3-NEXT: [[TMP37:%.*]] = phi i64 [ [[ATOMIC_LOAD9]], [[ATOMIC_EXIT]] ], [ [[TMP45:%.*]], [[ATOMIC_CONT10]] ]
-// CHECK3-NEXT: [[TMP38:%.*]] = bitcast double* [[ATOMIC_TEMP11]] to i64*
-// CHECK3-NEXT: [[TMP39:%.*]] = bitcast i64 [[TMP37]] to double
-// CHECK3-NEXT: store double [[TMP39]], double* [[_TMP12]], align 8
-// CHECK3-NEXT: [[TMP40:%.*]] = load double, double* [[_TMP12]], align 8
-// CHECK3-NEXT: [[TMP41:%.*]] = load double, double* [[G1]], align 8
-// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[TMP40]], [[TMP41]]
-// CHECK3-NEXT: store double [[ADD13]], double* [[ATOMIC_TEMP11]], align 8
-// CHECK3-NEXT: [[TMP42:%.*]] = load i64, i64* [[TMP38]], align 8
-// CHECK3-NEXT: [[TMP43:%.*]] = bitcast double* [[TMP1]] to i64*
-// CHECK3-NEXT: [[TMP44:%.*]] = cmpxchg i64* [[TMP43]], i64 [[TMP37]], i64 [[TMP42]] monotonic monotonic, align 8
-// CHECK3-NEXT: [[TMP45]] = extractvalue { i64, i1 } [[TMP44]], 0
-// CHECK3-NEXT: [[TMP46:%.*]] = extractvalue { i64, i1 } [[TMP44]], 1
-// CHECK3-NEXT: br i1 [[TMP46]], label [[ATOMIC_EXIT14:%.*]], label [[ATOMIC_CONT10]]
-// CHECK3: atomic_exit14:
+// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw fadd double* @g, double [[TMP25]] monotonic, align 8
+// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[G1]], align 8
+// CHECK3-NEXT: [[TMP28:%.*]] = atomicrmw fadd double* [[TMP1]], double [[TMP27]] monotonic, align 8
// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
@@ -8633,10 +8555,6 @@ int main() {
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>, align 8
// CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8
-// CHECK4-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK4-NEXT: [[_TMP8:%.*]] = alloca double, align 8
-// CHECK4-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca double, align 8
-// CHECK4-NEXT: [[_TMP13:%.*]] = alloca double, align 8
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: [[TMP0:%.*]] = load double*, double** @g1, align 8
@@ -8739,43 +8657,9 @@ int main() {
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK4: .omp.reduction.case2:
// CHECK4-NEXT: [[TMP29:%.*]] = load double, double* [[G]], align 8
-// CHECK4-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8
-// CHECK4-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK4: atomic_cont:
-// CHECK4-NEXT: [[TMP30:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP37:%.*]], [[ATOMIC_CONT]] ]
-// CHECK4-NEXT: [[TMP31:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK4-NEXT: [[TMP32:%.*]] = bitcast i64 [[TMP30]] to double
-// CHECK4-NEXT: store double [[TMP32]], double* [[_TMP8]], align 8
-// CHECK4-NEXT: [[TMP33:%.*]] = load double, double* [[_TMP8]], align 8
-// CHECK4-NEXT: [[TMP34:%.*]] = load double, double* [[G]], align 8
-// CHECK4-NEXT: [[ADD9:%.*]] = fadd double [[TMP33]], [[TMP34]]
-// CHECK4-NEXT: store double [[ADD9]], double* [[ATOMIC_TEMP]], align 8
-// CHECK4-NEXT: [[TMP35:%.*]] = load i64, i64* [[TMP31]], align 8
-// CHECK4-NEXT: [[TMP36:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP30]], i64 [[TMP35]] monotonic monotonic, align 8
-// CHECK4-NEXT: [[TMP37]] = extractvalue { i64, i1 } [[TMP36]], 0
-// CHECK4-NEXT: [[TMP38:%.*]] = extractvalue { i64, i1 } [[TMP36]], 1
-// CHECK4-NEXT: br i1 [[TMP38]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK4: atomic_exit:
-// CHECK4-NEXT: [[TMP39:%.*]] = load double, double* [[G1]], align 8
-// CHECK4-NEXT: [[TMP40:%.*]] = bitcast double* [[TMP1]] to i64*
-// CHECK4-NEXT: [[ATOMIC_LOAD10:%.*]] = load atomic i64, i64* [[TMP40]] monotonic, align 8
-// CHECK4-NEXT: br label [[ATOMIC_CONT11:%.*]]
-// CHECK4: atomic_cont11:
-// CHECK4-NEXT: [[TMP41:%.*]] = phi i64 [ [[ATOMIC_LOAD10]], [[ATOMIC_EXIT]] ], [ [[TMP49:%.*]], [[ATOMIC_CONT11]] ]
-// CHECK4-NEXT: [[TMP42:%.*]] = bitcast double* [[ATOMIC_TEMP12]] to i64*
-// CHECK4-NEXT: [[TMP43:%.*]] = bitcast i64 [[TMP41]] to double
-// CHECK4-NEXT: store double [[TMP43]], double* [[_TMP13]], align 8
-// CHECK4-NEXT: [[TMP44:%.*]] = load double, double* [[_TMP13]], align 8
-// CHECK4-NEXT: [[TMP45:%.*]] = load double, double* [[G1]], align 8
-// CHECK4-NEXT: [[ADD14:%.*]] = fadd double [[TMP44]], [[TMP45]]
-// CHECK4-NEXT: store double [[ADD14]], double* [[ATOMIC_TEMP12]], align 8
-// CHECK4-NEXT: [[TMP46:%.*]] = load i64, i64* [[TMP42]], align 8
-// CHECK4-NEXT: [[TMP47:%.*]] = bitcast double* [[TMP1]] to i64*
-// CHECK4-NEXT: [[TMP48:%.*]] = cmpxchg i64* [[TMP47]], i64 [[TMP41]], i64 [[TMP46]] monotonic monotonic, align 8
-// CHECK4-NEXT: [[TMP49]] = extractvalue { i64, i1 } [[TMP48]], 0
-// CHECK4-NEXT: [[TMP50:%.*]] = extractvalue { i64, i1 } [[TMP48]], 1
-// CHECK4-NEXT: br i1 [[TMP50]], label [[ATOMIC_EXIT15:%.*]], label [[ATOMIC_CONT11]]
-// CHECK4: atomic_exit15:
+// CHECK4-NEXT: [[TMP30:%.*]] = atomicrmw fadd double* @g, double [[TMP29]] monotonic, align 8
+// CHECK4-NEXT: [[TMP31:%.*]] = load double, double* [[G1]], align 8
+// CHECK4-NEXT: [[TMP32:%.*]] = atomicrmw fadd double* [[TMP1]], double [[TMP31]] monotonic, align 8
// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK4: .omp.reduction.default:
diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp
index 3432947670c2f..f40e9081f42ff 100644
--- a/clang/test/OpenMP/parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp
@@ -605,11 +605,9 @@ int main() {
// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK1-NEXT: [[REF_TMP12:%.*]] = alloca [[STRUCT_S]], align 4
// CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK1-NEXT: [[ATOMIC_TEMP23:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[_TMP24:%.*]] = alloca float, align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8
@@ -697,77 +695,59 @@ int main() {
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
// CHECK1-NEXT: [[TMP32:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP1]] to i32*
-// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP33]] monotonic, align 4
-// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP34:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP42:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT: [[TMP35:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK1-NEXT: [[TMP36:%.*]] = bitcast i32 [[TMP34]] to float
-// CHECK1-NEXT: store float [[TMP36]], float* [[TMP]], align 4
-// CHECK1-NEXT: [[TMP37:%.*]] = load float, float* [[TMP]], align 4
-// CHECK1-NEXT: [[TMP38:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK1-NEXT: [[ADD11:%.*]] = fadd float [[TMP37]], [[TMP38]]
-// CHECK1-NEXT: store float [[ADD11]], float* [[ATOMIC_TEMP]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP35]], align 4
-// CHECK1-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP1]] to i32*
-// CHECK1-NEXT: [[TMP41:%.*]] = cmpxchg i32* [[TMP40]], i32 [[TMP34]], i32 [[TMP39]] monotonic monotonic, align 4
-// CHECK1-NEXT: [[TMP42]] = extractvalue { i32, i1 } [[TMP41]], 0
-// CHECK1-NEXT: [[TMP43:%.*]] = extractvalue { i32, i1 } [[TMP41]], 1
-// CHECK1-NEXT: br i1 [[TMP43]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK1: atomic_exit:
+// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw fadd float* [[TMP1]], float [[TMP32]] monotonic, align 4
// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
-// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[TMP3]] to i8*
-// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[CALL12]] to i8*
-// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false)
+// CHECK1-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
+// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP3]] to i8*
+// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL11]] to i8*
+// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false)
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT: [[CALL14:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]])
-// CHECK1-NEXT: [[TOBOOL15:%.*]] = fcmp une float [[CALL14]], 0.000000e+00
-// CHECK1-NEXT: br i1 [[TOBOOL15]], label [[LAND_RHS16:%.*]], label [[LAND_END19:%.*]]
-// CHECK1: land.rhs16:
-// CHECK1-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
-// CHECK1-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00
-// CHECK1-NEXT: br label [[LAND_END19]]
-// CHECK1: land.end19:
-// CHECK1-NEXT: [[TMP46:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL18]], [[LAND_RHS16]] ]
-// CHECK1-NEXT: [[CONV20:%.*]] = uitofp i1 [[TMP46]] to float
-// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]], float noundef [[CONV20]])
-// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP4]] to i8*
-// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[REF_TMP13]] to i8*
-// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false)
-// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]]) #[[ATTR5]]
+// CHECK1-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]])
+// CHECK1-NEXT: [[TOBOOL14:%.*]] = fcmp une float [[CALL13]], 0.000000e+00
+// CHECK1-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END18:%.*]]
+// CHECK1: land.rhs15:
+// CHECK1-NEXT: [[CALL16:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
+// CHECK1-NEXT: [[TOBOOL17:%.*]] = fcmp une float [[CALL16]], 0.000000e+00
+// CHECK1-NEXT: br label [[LAND_END18]]
+// CHECK1: land.end18:
+// CHECK1-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL17]], [[LAND_RHS15]] ]
+// CHECK1-NEXT: [[CONV19:%.*]] = uitofp i1 [[TMP36]] to float
+// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]], float noundef [[CONV19]])
+// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP4]] to i8*
+// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[REF_TMP12]] to i8*
+// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false)
+// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]]) #[[ATTR5]]
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT: [[TMP49:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT: [[TMP50:%.*]] = bitcast float* [[TMP5]] to i32*
-// CHECK1-NEXT: [[ATOMIC_LOAD21:%.*]] = load atomic i32, i32* [[TMP50]] monotonic, align 4
-// CHECK1-NEXT: br label [[ATOMIC_CONT22:%.*]]
-// CHECK1: atomic_cont22:
-// CHECK1-NEXT: [[TMP51:%.*]] = phi i32 [ [[ATOMIC_LOAD21]], [[LAND_END19]] ], [ [[TMP61:%.*]], [[COND_END28:%.*]] ]
-// CHECK1-NEXT: [[TMP52:%.*]] = bitcast float* [[ATOMIC_TEMP23]] to i32*
-// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i32 [[TMP51]] to float
-// CHECK1-NEXT: store float [[TMP53]], float* [[_TMP24]], align 4
-// CHECK1-NEXT: [[TMP54:%.*]] = load float, float* [[_TMP24]], align 4
-// CHECK1-NEXT: [[TMP55:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT: [[CMP25:%.*]] = fcmp olt float [[TMP54]], [[TMP55]]
-// CHECK1-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]]
-// CHECK1: cond.true26:
-// CHECK1-NEXT: [[TMP56:%.*]] = load float, float* [[_TMP24]], align 4
-// CHECK1-NEXT: br label [[COND_END28]]
-// CHECK1: cond.false27:
-// CHECK1-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT: br label [[COND_END28]]
-// CHECK1: cond.end28:
-// CHECK1-NEXT: [[COND29:%.*]] = phi float [ [[TMP56]], [[COND_TRUE26]] ], [ [[TMP57]], [[COND_FALSE27]] ]
-// CHECK1-NEXT: store float [[COND29]], float* [[ATOMIC_TEMP23]], align 4
-// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP52]], align 4
-// CHECK1-NEXT: [[TMP59:%.*]] = bitcast float* [[TMP5]] to i32*
-// CHECK1-NEXT: [[TMP60:%.*]] = cmpxchg i32* [[TMP59]], i32 [[TMP51]], i32 [[TMP58]] monotonic monotonic, align 4
-// CHECK1-NEXT: [[TMP61]] = extractvalue { i32, i1 } [[TMP60]], 0
-// CHECK1-NEXT: [[TMP62:%.*]] = extractvalue { i32, i1 } [[TMP60]], 1
-// CHECK1-NEXT: br i1 [[TMP62]], label [[ATOMIC_EXIT30:%.*]], label [[ATOMIC_CONT22]]
-// CHECK1: atomic_exit30:
+// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP5]] to i32*
+// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP40]] monotonic, align 4
+// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
+// CHECK1: atomic_cont:
+// CHECK1-NEXT: [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END18]] ], [ [[TMP51:%.*]], [[COND_END23:%.*]] ]
+// CHECK1-NEXT: [[TMP42:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK1-NEXT: [[TMP43:%.*]] = bitcast i32 [[TMP41]] to float
+// CHECK1-NEXT: store float [[TMP43]], float* [[TMP]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load float, float* [[TMP]], align 4
+// CHECK1-NEXT: [[TMP45:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT: [[CMP20:%.*]] = fcmp olt float [[TMP44]], [[TMP45]]
+// CHECK1-NEXT: br i1 [[CMP20]], label [[COND_TRUE21:%.*]], label [[COND_FALSE22:%.*]]
+// CHECK1: cond.true21:
+// CHECK1-NEXT: [[TMP46:%.*]] = load float, float* [[TMP]], align 4
+// CHECK1-NEXT: br label [[COND_END23]]
+// CHECK1: cond.false22:
+// CHECK1-NEXT: [[TMP47:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT: br label [[COND_END23]]
+// CHECK1: cond.end23:
+// CHECK1-NEXT: [[COND24:%.*]] = phi float [ [[TMP46]], [[COND_TRUE21]] ], [ [[TMP47]], [[COND_FALSE22]] ]
+// CHECK1-NEXT: store float [[COND24]], float* [[ATOMIC_TEMP]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP42]], align 4
+// CHECK1-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP5]] to i32*
+// CHECK1-NEXT: [[TMP50:%.*]] = cmpxchg i32* [[TMP49]], i32 [[TMP41]], i32 [[TMP48]] monotonic monotonic, align 4
+// CHECK1-NEXT: [[TMP51]] = extractvalue { i32, i1 } [[TMP50]], 0
+// CHECK1-NEXT: [[TMP52:%.*]] = extractvalue { i32, i1 } [[TMP50]], 1
+// CHECK1-NEXT: br i1 [[TMP52]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK1: atomic_exit:
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]]
@@ -2005,11 +1985,9 @@ int main() {
// CHECK2-NEXT: [[T_VAR15:%.*]] = alloca float, align 4
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK2-NEXT: [[REF_TMP12:%.*]] = alloca [[STRUCT_S]], align 4
// CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca float, align 4
-// CHECK2-NEXT: [[REF_TMP13:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK2-NEXT: [[ATOMIC_TEMP23:%.*]] = alloca float, align 4
-// CHECK2-NEXT: [[_TMP24:%.*]] = alloca float, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8
@@ -2097,77 +2075,59 @@ int main() {
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.case2:
// CHECK2-NEXT: [[TMP32:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP1]] to i32*
-// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP33]] monotonic, align 4
-// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK2: atomic_cont:
-// CHECK2-NEXT: [[TMP34:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP42:%.*]], [[ATOMIC_CONT]] ]
-// CHECK2-NEXT: [[TMP35:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK2-NEXT: [[TMP36:%.*]] = bitcast i32 [[TMP34]] to float
-// CHECK2-NEXT: store float [[TMP36]], float* [[TMP]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load float, float* [[TMP]], align 4
-// CHECK2-NEXT: [[TMP38:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK2-NEXT: [[ADD11:%.*]] = fadd float [[TMP37]], [[TMP38]]
-// CHECK2-NEXT: store float [[ADD11]], float* [[ATOMIC_TEMP]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP35]], align 4
-// CHECK2-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP1]] to i32*
-// CHECK2-NEXT: [[TMP41:%.*]] = cmpxchg i32* [[TMP40]], i32 [[TMP34]], i32 [[TMP39]] monotonic monotonic, align 4
-// CHECK2-NEXT: [[TMP42]] = extractvalue { i32, i1 } [[TMP41]], 0
-// CHECK2-NEXT: [[TMP43:%.*]] = extractvalue { i32, i1 } [[TMP41]], 1
-// CHECK2-NEXT: br i1 [[TMP43]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK2: atomic_exit:
+// CHECK2-NEXT: [[TMP33:%.*]] = atomicrmw fadd float* [[TMP1]], float [[TMP32]] monotonic, align 4
// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
-// CHECK2-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[TMP3]] to i8*
-// CHECK2-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[CALL12]] to i8*
-// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false)
+// CHECK2-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
+// CHECK2-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP3]] to i8*
+// CHECK2-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL11]] to i8*
+// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false)
// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT: [[CALL14:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]])
-// CHECK2-NEXT: [[TOBOOL15:%.*]] = fcmp une float [[CALL14]], 0.000000e+00
-// CHECK2-NEXT: br i1 [[TOBOOL15]], label [[LAND_RHS16:%.*]], label [[LAND_END19:%.*]]
-// CHECK2: land.rhs16:
-// CHECK2-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
-// CHECK2-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00
-// CHECK2-NEXT: br label [[LAND_END19]]
-// CHECK2: land.end19:
-// CHECK2-NEXT: [[TMP46:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL18]], [[LAND_RHS16]] ]
-// CHECK2-NEXT: [[CONV20:%.*]] = uitofp i1 [[TMP46]] to float
-// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]], float noundef [[CONV20]])
-// CHECK2-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP4]] to i8*
-// CHECK2-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[REF_TMP13]] to i8*
-// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false)
-// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]]) #[[ATTR5]]
+// CHECK2-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]])
+// CHECK2-NEXT: [[TOBOOL14:%.*]] = fcmp une float [[CALL13]], 0.000000e+00
+// CHECK2-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END18:%.*]]
+// CHECK2: land.rhs15:
+// CHECK2-NEXT: [[CALL16:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
+// CHECK2-NEXT: [[TOBOOL17:%.*]] = fcmp une float [[CALL16]], 0.000000e+00
+// CHECK2-NEXT: br label [[LAND_END18]]
+// CHECK2: land.end18:
+// CHECK2-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL17]], [[LAND_RHS15]] ]
+// CHECK2-NEXT: [[CONV19:%.*]] = uitofp i1 [[TMP36]] to float
+// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]], float noundef [[CONV19]])
+// CHECK2-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP4]] to i8*
+// CHECK2-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[REF_TMP12]] to i8*
+// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false)
+// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]]) #[[ATTR5]]
// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT: [[TMP49:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT: [[TMP50:%.*]] = bitcast float* [[TMP5]] to i32*
-// CHECK2-NEXT: [[ATOMIC_LOAD21:%.*]] = load atomic i32, i32* [[TMP50]] monotonic, align 4
-// CHECK2-NEXT: br label [[ATOMIC_CONT22:%.*]]
-// CHECK2: atomic_cont22:
-// CHECK2-NEXT: [[TMP51:%.*]] = phi i32 [ [[ATOMIC_LOAD21]], [[LAND_END19]] ], [ [[TMP61:%.*]], [[COND_END28:%.*]] ]
-// CHECK2-NEXT: [[TMP52:%.*]] = bitcast float* [[ATOMIC_TEMP23]] to i32*
-// CHECK2-NEXT: [[TMP53:%.*]] = bitcast i32 [[TMP51]] to float
-// CHECK2-NEXT: store float [[TMP53]], float* [[_TMP24]], align 4
-// CHECK2-NEXT: [[TMP54:%.*]] = load float, float* [[_TMP24]], align 4
-// CHECK2-NEXT: [[TMP55:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT: [[CMP25:%.*]] = fcmp olt float [[TMP54]], [[TMP55]]
-// CHECK2-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]]
-// CHECK2: cond.true26:
-// CHECK2-NEXT: [[TMP56:%.*]] = load float, float* [[_TMP24]], align 4
-// CHECK2-NEXT: br label [[COND_END28]]
-// CHECK2: cond.false27:
-// CHECK2-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT: br label [[COND_END28]]
-// CHECK2: cond.end28:
-// CHECK2-NEXT: [[COND29:%.*]] = phi float [ [[TMP56]], [[COND_TRUE26]] ], [ [[TMP57]], [[COND_FALSE27]] ]
-// CHECK2-NEXT: store float [[COND29]], float* [[ATOMIC_TEMP23]], align 4
-// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP52]], align 4
-// CHECK2-NEXT: [[TMP59:%.*]] = bitcast float* [[TMP5]] to i32*
-// CHECK2-NEXT: [[TMP60:%.*]] = cmpxchg i32* [[TMP59]], i32 [[TMP51]], i32 [[TMP58]] monotonic monotonic, align 4
-// CHECK2-NEXT: [[TMP61]] = extractvalue { i32, i1 } [[TMP60]], 0
-// CHECK2-NEXT: [[TMP62:%.*]] = extractvalue { i32, i1 } [[TMP60]], 1
-// CHECK2-NEXT: br i1 [[TMP62]], label [[ATOMIC_EXIT30:%.*]], label [[ATOMIC_CONT22]]
-// CHECK2: atomic_exit30:
+// CHECK2-NEXT: [[TMP39:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP5]] to i32*
+// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP40]] monotonic, align 4
+// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
+// CHECK2: atomic_cont:
+// CHECK2-NEXT: [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END18]] ], [ [[TMP51:%.*]], [[COND_END23:%.*]] ]
+// CHECK2-NEXT: [[TMP42:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK2-NEXT: [[TMP43:%.*]] = bitcast i32 [[TMP41]] to float
+// CHECK2-NEXT: store float [[TMP43]], float* [[TMP]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load float, float* [[TMP]], align 4
+// CHECK2-NEXT: [[TMP45:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT: [[CMP20:%.*]] = fcmp olt float [[TMP44]], [[TMP45]]
+// CHECK2-NEXT: br i1 [[CMP20]], label [[COND_TRUE21:%.*]], label [[COND_FALSE22:%.*]]
+// CHECK2: cond.true21:
+// CHECK2-NEXT: [[TMP46:%.*]] = load float, float* [[TMP]], align 4
+// CHECK2-NEXT: br label [[COND_END23]]
+// CHECK2: cond.false22:
+// CHECK2-NEXT: [[TMP47:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT: br label [[COND_END23]]
+// CHECK2: cond.end23:
+// CHECK2-NEXT: [[COND24:%.*]] = phi float [ [[TMP46]], [[COND_TRUE21]] ], [ [[TMP47]], [[COND_FALSE22]] ]
+// CHECK2-NEXT: store float [[COND24]], float* [[ATOMIC_TEMP]], align 4
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP42]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP5]] to i32*
+// CHECK2-NEXT: [[TMP50:%.*]] = cmpxchg i32* [[TMP49]], i32 [[TMP41]], i32 [[TMP48]] monotonic monotonic, align 4
+// CHECK2-NEXT: [[TMP51]] = extractvalue { i32, i1 } [[TMP50]], 0
+// CHECK2-NEXT: [[TMP52:%.*]] = extractvalue { i32, i1 } [[TMP50]], 1
+// CHECK2-NEXT: br i1 [[TMP52]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK2: atomic_exit:
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.default:
// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]]
diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index cae2ef39816e0..8fb66752f434f 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -502,8 +502,6 @@ int main()
// CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8
-// CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK1-NEXT: [[_TMP30:%.*]] = alloca double, align 8
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
@@ -668,39 +666,21 @@ int main()
// CHECK1: .omp.reduction.case2:
// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP5]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP59]]
-// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]]
+// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]]
// CHECK1: omp.arraycpy.body27:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ]
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY27]] ]
// CHECK1-NEXT: [[TMP60:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8
-// CHECK1-NEXT: [[TMP61:%.*]] = bitcast double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]] to i64*
-// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* [[TMP61]] monotonic, align 8
-// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP62:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY27]] ], [ [[TMP70:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT: [[TMP63:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i64 [[TMP62]] to double
-// CHECK1-NEXT: store double [[TMP64]], double* [[_TMP30]], align 8
-// CHECK1-NEXT: [[TMP65:%.*]] = load double, double* [[_TMP30]], align 8
-// CHECK1-NEXT: [[TMP66:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8
-// CHECK1-NEXT: [[ADD31:%.*]] = fadd double [[TMP65]], [[TMP66]]
-// CHECK1-NEXT: store double [[ADD31]], double* [[ATOMIC_TEMP]], align 8
-// CHECK1-NEXT: [[TMP67:%.*]] = load i64, i64* [[TMP63]], align 8
-// CHECK1-NEXT: [[TMP68:%.*]] = bitcast double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]] to i64*
-// CHECK1-NEXT: [[TMP69:%.*]] = cmpxchg i64* [[TMP68]], i64 [[TMP62]], i64 [[TMP67]] monotonic monotonic, align 8
-// CHECK1-NEXT: [[TMP70]] = extractvalue { i64, i1 } [[TMP69]], 0
-// CHECK1-NEXT: [[TMP71:%.*]] = extractvalue { i64, i1 } [[TMP69]], 1
-// CHECK1-NEXT: br i1 [[TMP71]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
-// CHECK1: atomic_exit:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP59]]
-// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY27]]
-// CHECK1: omp.arraycpy.done35:
+// CHECK1-NEXT: [[TMP61:%.*]] = atomicrmw fadd double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP60]] monotonic, align 8
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1
+// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP59]]
+// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY27]]
+// CHECK1: omp.arraycpy.done33:
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
-// CHECK1-NEXT: [[TMP72:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP72]])
+// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP62]])
// CHECK1-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/sections_reduction_codegen.cpp b/clang/test/OpenMP/sections_reduction_codegen.cpp
index f2e636e8aa0d5..7759b0900f2e9 100644
--- a/clang/test/OpenMP/sections_reduction_codegen.cpp
+++ b/clang/test/OpenMP/sections_reduction_codegen.cpp
@@ -268,11 +268,9 @@ int main() {
// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4
// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca [[STRUCT_S]], align 4
// CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[REF_TMP17:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK1-NEXT: [[ATOMIC_TEMP27:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[_TMP28:%.*]] = alloca float, align 4
// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8
@@ -401,77 +399,59 @@ int main() {
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
// CHECK1-NEXT: [[TMP43:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK1-NEXT: [[TMP44:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP44]] monotonic, align 4
-// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK1: atomic_cont:
-// CHECK1-NEXT: [[TMP45:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP53:%.*]], [[ATOMIC_CONT]] ]
-// CHECK1-NEXT: [[TMP46:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK1-NEXT: [[TMP47:%.*]] = bitcast i32 [[TMP45]] to float
-// CHECK1-NEXT: store float [[TMP47]], float* [[TMP]], align 4
-// CHECK1-NEXT: [[TMP48:%.*]] = load float, float* [[TMP]], align 4
-// CHECK1-NEXT: [[TMP49:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK1-NEXT: [[ADD15:%.*]] = fadd float [[TMP48]], [[TMP49]]
-// CHECK1-NEXT: store float [[ADD15]], float* [[ATOMIC_TEMP]], align 4
-// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP46]], align 4
-// CHECK1-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK1-NEXT: [[TMP52:%.*]] = cmpxchg i32* [[TMP51]], i32 [[TMP45]], i32 [[TMP50]] monotonic monotonic, align 4
-// CHECK1-NEXT: [[TMP53]] = extractvalue { i32, i1 } [[TMP52]], 0
-// CHECK1-NEXT: [[TMP54:%.*]] = extractvalue { i32, i1 } [[TMP52]], 1
-// CHECK1-NEXT: br i1 [[TMP54]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK1: atomic_exit:
+// CHECK1-NEXT: [[TMP44:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP43]] monotonic, align 4
// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
-// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %struct.S* [[TMP1]] to i8*
-// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[CALL16]] to i8*
-// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i64 4, i1 false)
+// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
+// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP1]] to i8*
+// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[CALL15]] to i8*
+// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false)
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT: [[CALL18:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
-// CHECK1-NEXT: [[TOBOOL19:%.*]] = fcmp une float [[CALL18]], 0.000000e+00
-// CHECK1-NEXT: br i1 [[TOBOOL19]], label [[LAND_RHS20:%.*]], label [[LAND_END23:%.*]]
-// CHECK1: land.rhs20:
-// CHECK1-NEXT: [[CALL21:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
-// CHECK1-NEXT: [[TOBOOL22:%.*]] = fcmp une float [[CALL21]], 0.000000e+00
-// CHECK1-NEXT: br label [[LAND_END23]]
-// CHECK1: land.end23:
-// CHECK1-NEXT: [[TMP57:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL22]], [[LAND_RHS20]] ]
-// CHECK1-NEXT: [[CONV24:%.*]] = uitofp i1 [[TMP57]] to float
-// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]], float noundef [[CONV24]])
-// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
-// CHECK1-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[REF_TMP17]] to i8*
-// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP58]], i8* align 4 [[TMP59]], i64 4, i1 false)
-// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]]) #[[ATTR4]]
+// CHECK1-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
+// CHECK1-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00
+// CHECK1-NEXT: br i1 [[TOBOOL18]], label [[LAND_RHS19:%.*]], label [[LAND_END22:%.*]]
+// CHECK1: land.rhs19:
+// CHECK1-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
+// CHECK1-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00
+// CHECK1-NEXT: br label [[LAND_END22]]
+// CHECK1: land.end22:
+// CHECK1-NEXT: [[TMP47:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL21]], [[LAND_RHS19]] ]
+// CHECK1-NEXT: [[CONV23:%.*]] = uitofp i1 [[TMP47]] to float
+// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]], float noundef [[CONV23]])
+// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
+// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[REF_TMP16]] to i8*
+// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP48]], i8* align 4 [[TMP49]], i64 4, i1 false)
+// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK1-NEXT: [[TMP60:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK1-NEXT: [[ATOMIC_LOAD25:%.*]] = load atomic i32, i32* [[TMP61]] monotonic, align 4
-// CHECK1-NEXT: br label [[ATOMIC_CONT26:%.*]]
-// CHECK1: atomic_cont26:
-// CHECK1-NEXT: [[TMP62:%.*]] = phi i32 [ [[ATOMIC_LOAD25]], [[LAND_END23]] ], [ [[TMP72:%.*]], [[COND_END32:%.*]] ]
-// CHECK1-NEXT: [[TMP63:%.*]] = bitcast float* [[ATOMIC_TEMP27]] to i32*
-// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32 [[TMP62]] to float
-// CHECK1-NEXT: store float [[TMP64]], float* [[_TMP28]], align 4
-// CHECK1-NEXT: [[TMP65:%.*]] = load float, float* [[_TMP28]], align 4
-// CHECK1-NEXT: [[TMP66:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT: [[CMP29:%.*]] = fcmp olt float [[TMP65]], [[TMP66]]
-// CHECK1-NEXT: br i1 [[CMP29]], label [[COND_TRUE30:%.*]], label [[COND_FALSE31:%.*]]
-// CHECK1: cond.true30:
-// CHECK1-NEXT: [[TMP67:%.*]] = load float, float* [[_TMP28]], align 4
-// CHECK1-NEXT: br label [[COND_END32]]
-// CHECK1: cond.false31:
-// CHECK1-NEXT: [[TMP68:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK1-NEXT: br label [[COND_END32]]
-// CHECK1: cond.end32:
-// CHECK1-NEXT: [[COND33:%.*]] = phi float [ [[TMP67]], [[COND_TRUE30]] ], [ [[TMP68]], [[COND_FALSE31]] ]
-// CHECK1-NEXT: store float [[COND33]], float* [[ATOMIC_TEMP27]], align 4
-// CHECK1-NEXT: [[TMP69:%.*]] = load i32, i32* [[TMP63]], align 4
-// CHECK1-NEXT: [[TMP70:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK1-NEXT: [[TMP71:%.*]] = cmpxchg i32* [[TMP70]], i32 [[TMP62]], i32 [[TMP69]] monotonic monotonic, align 4
-// CHECK1-NEXT: [[TMP72]] = extractvalue { i32, i1 } [[TMP71]], 0
-// CHECK1-NEXT: [[TMP73:%.*]] = extractvalue { i32, i1 } [[TMP71]], 1
-// CHECK1-NEXT: br i1 [[TMP73]], label [[ATOMIC_EXIT34:%.*]], label [[ATOMIC_CONT26]]
-// CHECK1: atomic_exit34:
+// CHECK1-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP51]] monotonic, align 4
+// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]]
+// CHECK1: atomic_cont:
+// CHECK1-NEXT: [[TMP52:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END22]] ], [ [[TMP62:%.*]], [[COND_END27:%.*]] ]
+// CHECK1-NEXT: [[TMP53:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i32 [[TMP52]] to float
+// CHECK1-NEXT: store float [[TMP54]], float* [[TMP]], align 4
+// CHECK1-NEXT: [[TMP55:%.*]] = load float, float* [[TMP]], align 4
+// CHECK1-NEXT: [[TMP56:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT: [[CMP24:%.*]] = fcmp olt float [[TMP55]], [[TMP56]]
+// CHECK1-NEXT: br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE26:%.*]]
+// CHECK1: cond.true25:
+// CHECK1-NEXT: [[TMP57:%.*]] = load float, float* [[TMP]], align 4
+// CHECK1-NEXT: br label [[COND_END27]]
+// CHECK1: cond.false26:
+// CHECK1-NEXT: [[TMP58:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK1-NEXT: br label [[COND_END27]]
+// CHECK1: cond.end27:
+// CHECK1-NEXT: [[COND28:%.*]] = phi float [ [[TMP57]], [[COND_TRUE25]] ], [ [[TMP58]], [[COND_FALSE26]] ]
+// CHECK1-NEXT: store float [[COND28]], float* [[ATOMIC_TEMP]], align 4
+// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP53]], align 4
+// CHECK1-NEXT: [[TMP60:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK1-NEXT: [[TMP61:%.*]] = cmpxchg i32* [[TMP60]], i32 [[TMP52]], i32 [[TMP59]] monotonic monotonic, align 4
+// CHECK1-NEXT: [[TMP62]] = extractvalue { i32, i1 } [[TMP61]], 0
+// CHECK1-NEXT: [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP61]], 1
+// CHECK1-NEXT: br i1 [[TMP63]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK1: atomic_exit:
// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
@@ -1100,11 +1080,9 @@ int main() {
// CHECK2-NEXT: [[T_VAR15:%.*]] = alloca float, align 4
// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4
+// CHECK2-NEXT: [[REF_TMP16:%.*]] = alloca [[STRUCT_S]], align 4
// CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca float, align 4
-// CHECK2-NEXT: [[REF_TMP17:%.*]] = alloca [[STRUCT_S]], align 4
-// CHECK2-NEXT: [[ATOMIC_TEMP27:%.*]] = alloca float, align 4
-// CHECK2-NEXT: [[_TMP28:%.*]] = alloca float, align 4
// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8
@@ -1233,77 +1211,59 @@ int main() {
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.case2:
// CHECK2-NEXT: [[TMP43:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK2-NEXT: [[TMP44:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP44]] monotonic, align 4
-// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK2: atomic_cont:
-// CHECK2-NEXT: [[TMP45:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP53:%.*]], [[ATOMIC_CONT]] ]
-// CHECK2-NEXT: [[TMP46:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
-// CHECK2-NEXT: [[TMP47:%.*]] = bitcast i32 [[TMP45]] to float
-// CHECK2-NEXT: store float [[TMP47]], float* [[TMP]], align 4
-// CHECK2-NEXT: [[TMP48:%.*]] = load float, float* [[TMP]], align 4
-// CHECK2-NEXT: [[TMP49:%.*]] = load float, float* [[T_VAR2]], align 4
-// CHECK2-NEXT: [[ADD15:%.*]] = fadd float [[TMP48]], [[TMP49]]
-// CHECK2-NEXT: store float [[ADD15]], float* [[ATOMIC_TEMP]], align 4
-// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP46]], align 4
-// CHECK2-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP0]] to i32*
-// CHECK2-NEXT: [[TMP52:%.*]] = cmpxchg i32* [[TMP51]], i32 [[TMP45]], i32 [[TMP50]] monotonic monotonic, align 4
-// CHECK2-NEXT: [[TMP53]] = extractvalue { i32, i1 } [[TMP52]], 0
-// CHECK2-NEXT: [[TMP54:%.*]] = extractvalue { i32, i1 } [[TMP52]], 1
-// CHECK2-NEXT: br i1 [[TMP54]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK2: atomic_exit:
+// CHECK2-NEXT: [[TMP44:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP43]] monotonic, align 4
// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
-// CHECK2-NEXT: [[TMP55:%.*]] = bitcast %struct.S* [[TMP1]] to i8*
-// CHECK2-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[CALL16]] to i8*
-// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i64 4, i1 false)
+// CHECK2-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]])
+// CHECK2-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP1]] to i8*
+// CHECK2-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[CALL15]] to i8*
+// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false)
// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT: [[CALL18:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
-// CHECK2-NEXT: [[TOBOOL19:%.*]] = fcmp une float [[CALL18]], 0.000000e+00
-// CHECK2-NEXT: br i1 [[TOBOOL19]], label [[LAND_RHS20:%.*]], label [[LAND_END23:%.*]]
-// CHECK2: land.rhs20:
-// CHECK2-NEXT: [[CALL21:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
-// CHECK2-NEXT: [[TOBOOL22:%.*]] = fcmp une float [[CALL21]], 0.000000e+00
-// CHECK2-NEXT: br label [[LAND_END23]]
-// CHECK2: land.end23:
-// CHECK2-NEXT: [[TMP57:%.*]] = phi i1 [ false, [[ATOMIC_EXIT]] ], [ [[TOBOOL22]], [[LAND_RHS20]] ]
-// CHECK2-NEXT: [[CONV24:%.*]] = uitofp i1 [[TMP57]] to float
-// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]], float noundef [[CONV24]])
-// CHECK2-NEXT: [[TMP58:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
-// CHECK2-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[REF_TMP17]] to i8*
-// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP58]], i8* align 4 [[TMP59]], i64 4, i1 false)
-// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP17]]) #[[ATTR4]]
+// CHECK2-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]])
+// CHECK2-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00
+// CHECK2-NEXT: br i1 [[TOBOOL18]], label [[LAND_RHS19:%.*]], label [[LAND_END22:%.*]]
+// CHECK2: land.rhs19:
+// CHECK2-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]])
+// CHECK2-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00
+// CHECK2-NEXT: br label [[LAND_END22]]
+// CHECK2: land.end22:
+// CHECK2-NEXT: [[TMP47:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL21]], [[LAND_RHS19]] ]
+// CHECK2-NEXT: [[CONV23:%.*]] = uitofp i1 [[TMP47]] to float
+// CHECK2-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]], float noundef [[CONV23]])
+// CHECK2-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP2]] to i8*
+// CHECK2-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[REF_TMP16]] to i8*
+// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP48]], i8* align 4 [[TMP49]], i64 4, i1 false)
+// CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
-// CHECK2-NEXT: [[TMP60:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK2-NEXT: [[ATOMIC_LOAD25:%.*]] = load atomic i32, i32* [[TMP61]] monotonic, align 4
-// CHECK2-NEXT: br label [[ATOMIC_CONT26:%.*]]
-// CHECK2: atomic_cont26:
-// CHECK2-NEXT: [[TMP62:%.*]] = phi i32 [ [[ATOMIC_LOAD25]], [[LAND_END23]] ], [ [[TMP72:%.*]], [[COND_END32:%.*]] ]
-// CHECK2-NEXT: [[TMP63:%.*]] = bitcast float* [[ATOMIC_TEMP27]] to i32*
-// CHECK2-NEXT: [[TMP64:%.*]] = bitcast i32 [[TMP62]] to float
-// CHECK2-NEXT: store float [[TMP64]], float* [[_TMP28]], align 4
-// CHECK2-NEXT: [[TMP65:%.*]] = load float, float* [[_TMP28]], align 4
-// CHECK2-NEXT: [[TMP66:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT: [[CMP29:%.*]] = fcmp olt float [[TMP65]], [[TMP66]]
-// CHECK2-NEXT: br i1 [[CMP29]], label [[COND_TRUE30:%.*]], label [[COND_FALSE31:%.*]]
-// CHECK2: cond.true30:
-// CHECK2-NEXT: [[TMP67:%.*]] = load float, float* [[_TMP28]], align 4
-// CHECK2-NEXT: br label [[COND_END32]]
-// CHECK2: cond.false31:
-// CHECK2-NEXT: [[TMP68:%.*]] = load float, float* [[T_VAR15]], align 4
-// CHECK2-NEXT: br label [[COND_END32]]
-// CHECK2: cond.end32:
-// CHECK2-NEXT: [[COND33:%.*]] = phi float [ [[TMP67]], [[COND_TRUE30]] ], [ [[TMP68]], [[COND_FALSE31]] ]
-// CHECK2-NEXT: store float [[COND33]], float* [[ATOMIC_TEMP27]], align 4
-// CHECK2-NEXT: [[TMP69:%.*]] = load i32, i32* [[TMP63]], align 4
-// CHECK2-NEXT: [[TMP70:%.*]] = bitcast float* [[TMP3]] to i32*
-// CHECK2-NEXT: [[TMP71:%.*]] = cmpxchg i32* [[TMP70]], i32 [[TMP62]], i32 [[TMP69]] monotonic monotonic, align 4
-// CHECK2-NEXT: [[TMP72]] = extractvalue { i32, i1 } [[TMP71]], 0
-// CHECK2-NEXT: [[TMP73:%.*]] = extractvalue { i32, i1 } [[TMP71]], 1
-// CHECK2-NEXT: br i1 [[TMP73]], label [[ATOMIC_EXIT34:%.*]], label [[ATOMIC_CONT26]]
-// CHECK2: atomic_exit34:
+// CHECK2-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK2-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP51]] monotonic, align 4
+// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]]
+// CHECK2: atomic_cont:
+// CHECK2-NEXT: [[TMP52:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END22]] ], [ [[TMP62:%.*]], [[COND_END27:%.*]] ]
+// CHECK2-NEXT: [[TMP53:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32*
+// CHECK2-NEXT: [[TMP54:%.*]] = bitcast i32 [[TMP52]] to float
+// CHECK2-NEXT: store float [[TMP54]], float* [[TMP]], align 4
+// CHECK2-NEXT: [[TMP55:%.*]] = load float, float* [[TMP]], align 4
+// CHECK2-NEXT: [[TMP56:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT: [[CMP24:%.*]] = fcmp olt float [[TMP55]], [[TMP56]]
+// CHECK2-NEXT: br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE26:%.*]]
+// CHECK2: cond.true25:
+// CHECK2-NEXT: [[TMP57:%.*]] = load float, float* [[TMP]], align 4
+// CHECK2-NEXT: br label [[COND_END27]]
+// CHECK2: cond.false26:
+// CHECK2-NEXT: [[TMP58:%.*]] = load float, float* [[T_VAR15]], align 4
+// CHECK2-NEXT: br label [[COND_END27]]
+// CHECK2: cond.end27:
+// CHECK2-NEXT: [[COND28:%.*]] = phi float [ [[TMP57]], [[COND_TRUE25]] ], [ [[TMP58]], [[COND_FALSE26]] ]
+// CHECK2-NEXT: store float [[COND28]], float* [[ATOMIC_TEMP]], align 4
+// CHECK2-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP53]], align 4
+// CHECK2-NEXT: [[TMP60:%.*]] = bitcast float* [[TMP3]] to i32*
+// CHECK2-NEXT: [[TMP61:%.*]] = cmpxchg i32* [[TMP60]], i32 [[TMP52]], i32 [[TMP59]] monotonic monotonic, align 4
+// CHECK2-NEXT: [[TMP62]] = extractvalue { i32, i1 } [[TMP61]], 0
+// CHECK2-NEXT: [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP61]], 1
+// CHECK2-NEXT: br i1 [[TMP63]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
+// CHECK2: atomic_exit:
// CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK2: .omp.reduction.default:
@@ -1868,8 +1828,6 @@ int main() {
// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8
// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK3-NEXT: [[TMP:%.*]] = alloca double, align 8
// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
@@ -1933,23 +1891,7 @@ int main() {
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.case2:
// CHECK3-NEXT: [[TMP17:%.*]] = load double, double* [[G]], align 8
-// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8
-// CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK3: atomic_cont:
-// CHECK3-NEXT: [[TMP18:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP25:%.*]], [[ATOMIC_CONT]] ]
-// CHECK3-NEXT: [[TMP19:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i64 [[TMP18]] to double
-// CHECK3-NEXT: store double [[TMP20]], double* [[TMP]], align 8
-// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* [[TMP]], align 8
-// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[G]], align 8
-// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP21]], [[TMP22]]
-// CHECK3-NEXT: store double [[ADD2]], double* [[ATOMIC_TEMP]], align 8
-// CHECK3-NEXT: [[TMP23:%.*]] = load i64, i64* [[TMP19]], align 8
-// CHECK3-NEXT: [[TMP24:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP18]], i64 [[TMP23]] monotonic monotonic, align 8
-// CHECK3-NEXT: [[TMP25]] = extractvalue { i64, i1 } [[TMP24]], 0
-// CHECK3-NEXT: [[TMP26:%.*]] = extractvalue { i64, i1 } [[TMP24]], 1
-// CHECK3-NEXT: br i1 [[TMP26]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK3: atomic_exit:
+// CHECK3-NEXT: [[TMP18:%.*]] = atomicrmw fadd double* @g, double [[TMP17]] monotonic, align 8
// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK3: .omp.reduction.default:
@@ -2017,8 +1959,6 @@ int main() {
// CHECK4-NEXT: [[G:%.*]] = alloca double, align 8
// CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double }>, align 8
// CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK4-NEXT: [[ATOMIC_TEMP:%.*]] = alloca double, align 8
-// CHECK4-NEXT: [[TMP:%.*]] = alloca double, align 8
// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4
@@ -2099,23 +2039,7 @@ int main() {
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK4: .omp.reduction.case2:
// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[G]], align 8
-// CHECK4-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* bitcast (double* @g to i64*) monotonic, align 8
-// CHECK4-NEXT: br label [[ATOMIC_CONT:%.*]]
-// CHECK4: atomic_cont:
-// CHECK4-NEXT: [[TMP23:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[ATOMIC_CONT]] ]
-// CHECK4-NEXT: [[TMP24:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
-// CHECK4-NEXT: [[TMP25:%.*]] = bitcast i64 [[TMP23]] to double
-// CHECK4-NEXT: store double [[TMP25]], double* [[TMP]], align 8
-// CHECK4-NEXT: [[TMP26:%.*]] = load double, double* [[TMP]], align 8
-// CHECK4-NEXT: [[TMP27:%.*]] = load double, double* [[G]], align 8
-// CHECK4-NEXT: [[ADD2:%.*]] = fadd double [[TMP26]], [[TMP27]]
-// CHECK4-NEXT: store double [[ADD2]], double* [[ATOMIC_TEMP]], align 8
-// CHECK4-NEXT: [[TMP28:%.*]] = load i64, i64* [[TMP24]], align 8
-// CHECK4-NEXT: [[TMP29:%.*]] = cmpxchg i64* bitcast (double* @g to i64*), i64 [[TMP23]], i64 [[TMP28]] monotonic monotonic, align 8
-// CHECK4-NEXT: [[TMP30]] = extractvalue { i64, i1 } [[TMP29]], 0
-// CHECK4-NEXT: [[TMP31:%.*]] = extractvalue { i64, i1 } [[TMP29]], 1
-// CHECK4-NEXT: br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]]
-// CHECK4: atomic_exit:
+// CHECK4-NEXT: [[TMP23:%.*]] = atomicrmw fadd double* @g, double [[TMP22]] monotonic, align 8
// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var)
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK4: .omp.reduction.default:
More information about the cfe-commits
mailing list