[clang] f17b1fb - [Clang][CodeGen] Optimised LLVM IR for atomic increments/decrements on floats (#89362)

via cfe-commits cfe-commits at lists.llvm.org
Thu May 2 02:42:39 PDT 2024


Author: Krishna Narayanan
Date: 2024-05-02T10:42:34+01:00
New Revision: f17b1fb6673e9ed6a7d9180230586113bb99748c

URL: https://github.com/llvm/llvm-project/commit/f17b1fb6673e9ed6a7d9180230586113bb99748c
DIFF: https://github.com/llvm/llvm-project/commit/f17b1fb6673e9ed6a7d9180230586113bb99748c.diff

LOG: [Clang][CodeGen] Optimised LLVM IR for atomic increments/decrements on floats (#89362)

Fixes #53079

Added: 
    clang/test/CodeGen/X86/x86-atomic-float.c

Modified: 
    clang/lib/CodeGen/CGExprScalar.cpp
    clang/test/CodeGen/X86/x86-atomic-long_double.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index af48e8d2b839e0..d84531959b50b6 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2801,6 +2801,19 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                   llvm::AtomicOrdering::SequentiallyConsistent);
       return isPre ? Builder.CreateBinOp(op, old, amt) : old;
     }
+    // Special case for atomic increment/decrement on floats
+    if (type->isFloatingType()) {
+      llvm::AtomicRMWInst::BinOp aop =
+          isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
+      llvm::Instruction::BinaryOps op =
+          isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
+      llvm::Value *amt = llvm::ConstantFP::get(
+          VMContext, llvm::APFloat(static_cast<float>(1.0)));
+      llvm::Value *old =
+          Builder.CreateAtomicRMW(aop, LV.getAddress(CGF), amt,
+                                  llvm::AtomicOrdering::SequentiallyConsistent);
+      return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+    }
     value = EmitLoadOfLValue(LV, E->getExprLoc());
     input = value;
     // For every other atomic operation, we need to emit a load-op-cmpxchg loop

diff  --git a/clang/test/CodeGen/X86/x86-atomic-float.c b/clang/test/CodeGen/X86/x86-atomic-float.c
new file mode 100644
index 00000000000000..a7a5083b92effa
--- /dev/null
+++ b/clang/test/CodeGen/X86/x86-atomic-float.c
@@ -0,0 +1,69 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck -check-prefixes=CHECK,CHECK64 %s
+// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck -check-prefixes=CHECK,CHECK32 %s
+
+
+// CHECK-LABEL: define dso_local i32 @test_int_inc(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw add ptr @test_int_inc.n, i32 1 seq_cst, align 4
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int test_int_inc()
+{
+    static _Atomic int n;
+    return n++;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_post_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_post_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_float_post_inc()
+{
+    static _Atomic float n;
+    return n++;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_post_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_post_dc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float test_float_post_dc()
+{
+    static _Atomic float n;
+    return n--;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_pre_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_pre_dc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// CHECK-NEXT:    ret float [[TMP1]]
+//
+float test_float_pre_dc()
+{
+    static _Atomic float n;
+    return --n;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_pre_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_pre_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// CHECK-NEXT:    ret float [[TMP1]]
+//
+float test_float_pre_inc()
+{
+    static _Atomic float n;
+    return ++n;
+}
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK32: {{.*}}
+// CHECK64: {{.*}}

diff  --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c
index ca3fb6730fb640..ab061395f0b770 100644
--- a/clang/test/CodeGen/X86/x86-atomic-long_double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c
@@ -1,351 +1,342 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck -check-prefix=CHECK32 %s
 
+// CHECK-LABEL: define dso_local x86_fp80 @testinc(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
+// CHECK-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP3]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testinc(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
+// CHECK32-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP3]]
+//
 long double testinc(_Atomic long double *addr) {
-  // CHECK-LABEL: @testinc
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: [[INT_VALUE:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-  // CHECK: br label %[[ATOMIC_OP:.+]]
-  // CHECK: [[ATOMIC_OP]]
-  // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[INC_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[RES:%.+]] = cmpxchg ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
-  // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
-  // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
-  // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
-  // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK: [[ATOMIC_CONT]]
-  // CHECK: ret x86_fp80 [[INC_VALUE]]
-  // CHECK32-LABEL: @testinc
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
-  // CHECK32: br label %[[ATOMIC_OP:.+]]
-  // CHECK32: [[ATOMIC_OP]]
-  // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[INC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: ret x86_fp80 [[INC_VALUE]]
 
   return ++*addr;
 }
 
+// CHECK-LABEL: define dso_local x86_fp80 @testdec(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
+// CHECK-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP2]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testdec(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP2]]
+//
 long double testdec(_Atomic long double *addr) {
-  // CHECK-LABEL: @testdec
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: [[INT_VALUE:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
-  // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-  // CHECK: br label %[[ATOMIC_OP:.+]]
-  // CHECK: [[ATOMIC_OP]]
-  // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[DEC_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[RES:%.+]] = cmpxchg ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
-  // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
-  // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
-  // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
-  // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK: [[ATOMIC_CONT]]
-  // CHECK: ret x86_fp80 [[ORIG_LD_VALUE]]
-  // CHECK32-LABEL: @testdec
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
-  // CHECK32: br label %[[ATOMIC_OP:.+]]
-  // CHECK32: [[ATOMIC_OP]]
-  // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[DEC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: ret x86_fp80 [[ORIG_LD_VALUE]]
 
   return (*addr)--;
 }
 
+// CHECK-LABEL: define dso_local x86_fp80 @testcompassign(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP5:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    br label [[ATOMIC_OP:%.*]]
+// CHECK:       atomic_op:
+// CHECK-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[ATOMIC_OP]] ]
+// CHECK-NEXT:    [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000
+// CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false)
+// CHECK-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false)
+// CHECK-NEXT:    store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16
+// CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1
+// CHECK-NEXT:    store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16
+// CHECK-NEXT:    [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16
+// CHECK-NEXT:    br i1 [[TMP7]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]]
+// CHECK:       atomic_cont:
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[ATOMIC_LOAD4:%.*]] = load atomic i128, ptr [[TMP9]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16
+// CHECK-NEXT:    [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP10]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testcompassign(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4
+// CHECK32-NEXT:    br label [[ATOMIC_OP:%.*]]
+// CHECK32:       atomic_op:
+// CHECK32-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[ATOMIC_OP]] ]
+// CHECK32-NEXT:    [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000
+// CHECK32-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT:    store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4
+// CHECK32-NEXT:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// CHECK32-NEXT:    [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT:    br i1 [[CALL]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]]
+// CHECK32:       atomic_cont:
+// CHECK32-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP5]]
+//
 long double testcompassign(_Atomic long double *addr) {
   *addr -= 25;
-  // CHECK-LABEL: @testcompassign
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: [[INT_VALUE:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-  // CHECK: br label %[[ATOMIC_OP:.+]]
-  // CHECK: [[ATOMIC_OP]]
-  // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK: [[SUB_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]],
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[SUB_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[RES:%.+]] = cmpxchg ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
-  // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
-  // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
-  // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
-  // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK: [[ATOMIC_CONT]]
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr %{{.+}}, align 8
-  // CHECK: [[INT_VAL:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VAL]], ptr [[INT_LD_TEMP:%.+]], align 16
-  // CHECK: [[RET_VAL:%.+]] = load x86_fp80, ptr [[LD_TEMP:%.+]], align 16
-  // CHECK: ret x86_fp80 [[RET_VAL]]
-  // CHECK32-LABEL: @testcompassign
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
-  // CHECK32: br label %[[ATOMIC_OP:.+]]
-  // CHECK32: [[ATOMIC_OP]]
-  // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK32: [[INC_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]],
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[INC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr %{{.+}}, align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[GET_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80, ptr [[GET_ADDR]], align 4
-  // CHECK32: ret x86_fp80 [[RET_VAL]]
   return *addr;
 }
 
+// CHECK-LABEL: define dso_local x86_fp80 @testassign(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP]], i8 0, i64 16, i1 false)
+// CHECK-NEXT:    store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    store atomic i128 [[TMP1]], ptr [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP2]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP3]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testassign(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT:    store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 4
+// CHECK32-NEXT:    call void @__atomic_store(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP2]]
+//
 long double testassign(_Atomic long double *addr) {
-  // CHECK-LABEL: @testassign
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[STORE_TEMP_PTR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 {{.+}}, ptr [[STORE_TEMP_PTR]], align 16
-  // CHECK: [[STORE_TEMP_INT:%.+]] = load i128, ptr [[STORE_TEMP_PTR]], align 16
-  // CHECK: store atomic i128 [[STORE_TEMP_INT]], ptr [[ADDR]] seq_cst, align 16
-  // CHECK32-LABEL: @testassign
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[STORE_TEMP_PTR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 {{.+}}, ptr [[STORE_TEMP_PTR]], align 4
-  // CHECK32: call void @__atomic_store(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[STORE_TEMP_PTR]], i32 noundef 5)
   *addr = 115;
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr %{{.+}}, align 8
-  // CHECK: [[INT_VAL:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VAL]], ptr [[INT_LD_TEMP:%.+]], align 16
-  // CHECK: [[RET_VAL:%.+]] = load x86_fp80, ptr [[LD_TEMP:%.+]], align 16
-  // CHECK: ret x86_fp80 [[RET_VAL]]
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr %{{.+}}, align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[LD_TEMP:%.+]], i32 noundef 5)
-  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80, ptr [[LD_TEMP]], align 4
-  // CHECK32: ret x86_fp80 [[RET_VAL]]
 
   return *addr;
 }
 
+// CHECK-LABEL: define dso_local x86_fp80 @test_volatile_inc(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
+// CHECK-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP3]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @test_volatile_inc(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
+// CHECK32-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP3]]
+//
 long double test_volatile_inc(volatile _Atomic long double *addr) {
-  // CHECK-LABEL: @test_volatile_inc
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-  // CHECK: br label %[[ATOMIC_OP:.+]]
-  // CHECK: [[ATOMIC_OP]]
-  // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[INC_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[RES:%.+]] = cmpxchg volatile ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
-  // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
-  // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
-  // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
-  // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK: [[ATOMIC_CONT]]
-  // CHECK: ret x86_fp80 [[INC_VALUE]]
-  // CHECK32-LABEL: @test_volatile_inc
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
-  // CHECK32: br label %[[ATOMIC_OP:.+]]
-  // CHECK32: [[ATOMIC_OP]]
-  // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[INC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: ret x86_fp80 [[INC_VALUE]]
   return ++*addr;
 }
 
+// CHECK-LABEL: define dso_local x86_fp80 @test_volatile_dec(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
+// CHECK-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP2]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @test_volatile_dec(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP2]]
+//
 long double test_volatile_dec(volatile _Atomic long double *addr) {
-  // CHECK-LABEL: @test_volatile_dec
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
-  // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-  // CHECK: br label %[[ATOMIC_OP:.+]]
-  // CHECK: [[ATOMIC_OP]]
-  // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[DEC_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[RES:%.+]] = cmpxchg volatile ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
-  // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
-  // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
-  // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
-  // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK: [[ATOMIC_CONT]]
-  // CHECK: ret x86_fp80 [[ORIG_LD_VALUE]]
-  // CHECK32-LABEL: @test_volatile_dec
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
-  // CHECK32: br label %[[ATOMIC_OP:.+]]
-  // CHECK32: [[ATOMIC_OP]]
-  // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[DEC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: ret x86_fp80 [[ORIG_LD_VALUE]]
   return (*addr)--;
 }
 
+// CHECK-LABEL: define dso_local x86_fp80 @test_volatile_compassign(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP5:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    br label [[ATOMIC_OP:%.*]]
+// CHECK:       atomic_op:
+// CHECK-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[ATOMIC_OP]] ]
+// CHECK-NEXT:    [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000
+// CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false)
+// CHECK-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false)
+// CHECK-NEXT:    store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16
+// CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg volatile ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1
+// CHECK-NEXT:    store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16
+// CHECK-NEXT:    [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16
+// CHECK-NEXT:    br i1 [[TMP7]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]]
+// CHECK:       atomic_cont:
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[ATOMIC_LOAD4:%.*]] = load atomic volatile i128, ptr [[TMP9]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16
+// CHECK-NEXT:    [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP10]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @test_volatile_compassign(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4
+// CHECK32-NEXT:    br label [[ATOMIC_OP:%.*]]
+// CHECK32:       atomic_op:
+// CHECK32-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[ATOMIC_OP]] ]
+// CHECK32-NEXT:    [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000
+// CHECK32-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT:    store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4
+// CHECK32-NEXT:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// CHECK32-NEXT:    [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT:    br i1 [[CALL]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]]
+// CHECK32:       atomic_cont:
+// CHECK32-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP5]]
+//
 long double test_volatile_compassign(volatile _Atomic long double *addr) {
   *addr -= 25;
-  // CHECK-LABEL: @test_volatile_compassign
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-  // CHECK: br label %[[ATOMIC_OP:.+]]
-  // CHECK: [[ATOMIC_OP]]
-  // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK: [[SUB_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]],
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[SUB_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[RES:%.+]] = cmpxchg volatile ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
-  // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
-  // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
-  // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
-  // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK: [[ATOMIC_CONT]]
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr %{{.+}}, align 8
-  // CHECK: [[INT_VAL:%.+]] = load atomic volatile i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VAL]], ptr [[INT_LD_TEMP:%.+]], align 16
-  // CHECK: [[RET_VAL:%.+]] = load x86_fp80, ptr [[LD_TEMP:%.+]], align 16
-  // CHECK32-LABEL: @test_volatile_compassign
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
-  // CHECK32: br label %[[ATOMIC_OP:.+]]
-  // CHECK32: [[ATOMIC_OP]]
-  // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK32: [[INC_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]],
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[INC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr %{{.+}}, align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[GET_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80, ptr [[GET_ADDR]], align 4
-  // CHECK32: ret x86_fp80 [[RET_VAL]]
   return *addr;
 }
 
+// CHECK-LABEL: define dso_local x86_fp80 @test_volatile_assign(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP]], i8 0, i64 16, i1 false)
+// CHECK-NEXT:    store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    store atomic volatile i128 [[TMP1]], ptr [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP2]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP3]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @test_volatile_assign(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT:    store x86_fp80 0xK4005E600000000000000, ptr [[ATOMIC_TEMP]], align 4
+// CHECK32-NEXT:    call void @__atomic_store(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP2]]
+//
 long double test_volatile_assign(volatile _Atomic long double *addr) {
-  // CHECK-LABEL: @test_volatile_assign
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[STORE_TEMP_PTR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 {{.+}}, ptr [[STORE_TEMP_PTR]], align 16
-  // CHECK: [[STORE_TEMP_INT:%.+]] = load i128, ptr [[STORE_TEMP_PTR]], align 16
-  // CHECK: store atomic volatile i128 [[STORE_TEMP_INT]], ptr [[ADDR]] seq_cst, align 16
-  // CHECK32-LABEL: @test_volatile_assign
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[STORE_TEMP_PTR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 {{.+}}, ptr [[STORE_TEMP_PTR]], align 4
-  // CHECK32: call void @__atomic_store(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[STORE_TEMP_PTR]], i32 noundef 5)
   *addr = 115;
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr %{{.+}}, align 8
-  // CHECK: [[INT_VAL:%.+]] = load atomic volatile i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VAL]], ptr [[INT_LD_TEMP:%.+]], align 16
-  // CHECK: [[RET_VAL:%.+]] = load x86_fp80, ptr [[LD_TEMP:%.+]], align 16
-  // CHECK: ret x86_fp80 [[RET_VAL]]
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr %{{.+}}, align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[LD_TEMP:%.+]], i32 noundef 5)
-  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80, ptr [[LD_TEMP]], align 4
-  // CHECK32: ret x86_fp80 [[RET_VAL]]
 
   return *addr;
 }


        


More information about the cfe-commits mailing list