[clang] 8043d5a - NFC: update clang tests to check ordering and alignment for atomicrmw/cmpxchg.

James Y Knight via cfe-commits cfe-commits at lists.llvm.org
Thu Feb 11 14:35:38 PST 2021


Author: James Y Knight
Date: 2021-02-11T17:35:09-05:00
New Revision: 8043d5a9643b5731454fce91fac0018bfddc96d6

URL: https://github.com/llvm/llvm-project/commit/8043d5a9643b5731454fce91fac0018bfddc96d6
DIFF: https://github.com/llvm/llvm-project/commit/8043d5a9643b5731454fce91fac0018bfddc96d6.diff

LOG: NFC: update clang tests to check ordering and alignment for atomicrmw/cmpxchg.

The ability to specify alignment was recently added, and it's an
important property which we should ensure is set as expected by
Clang. (Especially before making further changes to Clang's code in
this area.) But, because it's on the end of the lines, the existing
tests all ignore it.

Therefore, update all the tests to also verify the expected alignment
for atomicrmw and cmpxchg. While I was in there, I also updated uses
of 'load atomic' and 'store atomic', and added the memory ordering,
where that was missing.

Added: 
    

Modified: 
    clang/test/CodeGen/2008-03-05-syncPtr.c
    clang/test/CodeGen/2010-01-13-MemBarrier.c
    clang/test/CodeGen/Atomics.c
    clang/test/CodeGen/RISCV/riscv-atomics.c
    clang/test/CodeGen/X86/x86-atomic-long_double.c
    clang/test/CodeGen/X86/x86_64-atomic-128.c
    clang/test/CodeGen/arm-atomics-m.c
    clang/test/CodeGen/arm-atomics.c
    clang/test/CodeGen/arm64-microsoft-intrinsics.c
    clang/test/CodeGen/atomic-ops.c
    clang/test/CodeGen/atomic.c
    clang/test/CodeGen/atomic_ops.c
    clang/test/CodeGen/atomics-inlining.c
    clang/test/CodeGen/big-atomic-ops.c
    clang/test/CodeGen/bittest-intrin.c
    clang/test/CodeGen/builtins-nvptx-ptx50.cu
    clang/test/CodeGen/builtins-nvptx.c
    clang/test/CodeGen/c11atomics-ios.c
    clang/test/CodeGen/c11atomics.c
    clang/test/CodeGen/code-coverage-tsan.c
    clang/test/CodeGen/linux-arm-atomic.c
    clang/test/CodeGen/ms-intrinsics-other.c
    clang/test/CodeGen/ms-intrinsics.c
    clang/test/CodeGen/ms-volatile.c
    clang/test/CodeGen/pr45476.cpp
    clang/test/CodeGenCXX/atomic-align.cpp
    clang/test/CodeGenCXX/atomic-inline.cpp
    clang/test/CodeGenCXX/atomic.cpp
    clang/test/CodeGenCXX/atomicinit.cpp
    clang/test/CodeGenCXX/cxx1z-decomposition.cpp
    clang/test/CodeGenCXX/cxx1z-inline-variables.cpp
    clang/test/CodeGenCXX/static-init-pnacl.cpp
    clang/test/CodeGenCXX/static-init.cpp
    clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
    clang/test/CodeGenObjC/property-atomic-bool.m
    clang/test/CodeGenOpenCL/atomic-ops.cl
    clang/test/OpenMP/atomic_capture_codegen.cpp
    clang/test/OpenMP/atomic_codegen.cpp
    clang/test/OpenMP/atomic_read_codegen.c
    clang/test/OpenMP/atomic_update_codegen.cpp
    clang/test/OpenMP/atomic_write_codegen.c
    clang/test/OpenMP/for_reduction_codegen.cpp
    clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp
    clang/test/OpenMP/parallel_master_codegen.cpp
    clang/test/OpenMP/parallel_reduction_codegen.cpp
    clang/test/OpenMP/requires_acq_rel_codegen.cpp
    clang/test/OpenMP/requires_relaxed_codegen.cpp
    clang/test/OpenMP/requires_seq_cst_codegen.cpp
    clang/test/OpenMP/sections_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
    clang/test/OpenMP/taskloop_with_atomic_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/2008-03-05-syncPtr.c b/clang/test/CodeGen/2008-03-05-syncPtr.c
index 7c83d9fe355f..1e998e1db6ba 100644
--- a/clang/test/CodeGen/2008-03-05-syncPtr.c
+++ b/clang/test/CodeGen/2008-03-05-syncPtr.c
@@ -4,37 +4,37 @@ int* foo(int** a, int* b, int* c) {
 return __sync_val_compare_and_swap (a, b, c);
 }
 // CHECK-LABEL: define{{.*}} i32* @foo
-// CHECK: cmpxchg 
+// CHECK: cmpxchg {{.*}}, align 8
 
 int foo2(int** a, int* b, int* c) {
 return __sync_bool_compare_and_swap (a, b, c);
 }
 // CHECK-LABEL: define{{.*}} i32 @foo2
-// CHECK: cmpxchg
+// CHECK: cmpxchg {{.*}}, align 8
 
 int* foo3(int** a, int b) {
   return __sync_fetch_and_add (a, b);
 }
 // CHECK-LABEL: define{{.*}} i32* @foo3
-// CHECK: atomicrmw add
+// CHECK: atomicrmw add {{.*}}, align 8
 
 
 int* foo4(int** a, int b) {
   return __sync_fetch_and_sub (a, b);
 }
 // CHECK-LABEL: define{{.*}} i32* @foo4
-// CHECK: atomicrmw sub
+// CHECK: atomicrmw sub {{.*}}, align 8
 
 
 int* foo5(int** a, int* b) {
   return __sync_lock_test_and_set (a, b);
 }
 // CHECK-LABEL: define{{.*}} i32* @foo5
-// CHECK: atomicrmw xchg
+// CHECK: atomicrmw xchg {{.*}}, align 8
 
 
 int* foo6(int** a, int*** b) {
   return __sync_lock_test_and_set (a, b);
 }
 // CHECK-LABEL: define{{.*}} i32* @foo6
-// CHECK: atomicrmw xchg
+// CHECK: atomicrmw xchg {{.*}}, align 8

diff  --git a/clang/test/CodeGen/2010-01-13-MemBarrier.c b/clang/test/CodeGen/2010-01-13-MemBarrier.c
index 74d0cb054e73..25f2e4f7bb51 100644
--- a/clang/test/CodeGen/2010-01-13-MemBarrier.c
+++ b/clang/test/CodeGen/2010-01-13-MemBarrier.c
@@ -5,6 +5,6 @@ typedef unsigned __INT32_TYPE__ uint32_t;
 
 unsigned t(uint32_t *ptr, uint32_t val) {
   // CHECK:      @t
-  // CHECK: atomicrmw xchg i32* {{.*}} seq_cst
+  // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4
   return __sync_lock_test_and_set(ptr, val);
 }

diff  --git a/clang/test/CodeGen/Atomics.c b/clang/test/CodeGen/Atomics.c
index 4f76bccf7082..64fac0e704be 100644
--- a/clang/test/CodeGen/Atomics.c
+++ b/clang/test/CodeGen/Atomics.c
@@ -15,61 +15,61 @@ unsigned  __int128 u128;
 
 void test_op_ignore (void) // CHECK-LABEL: define{{.*}} void @test_op_ignore
 {
-  (void) __sync_fetch_and_add (&sc, 1); // CHECK: atomicrmw add i8
-  (void) __sync_fetch_and_add (&uc, 1); // CHECK: atomicrmw add i8
-  (void) __sync_fetch_and_add (&ss, 1); // CHECK: atomicrmw add i16
-  (void) __sync_fetch_and_add (&us, 1); // CHECK: atomicrmw add i16
-  (void) __sync_fetch_and_add (&si, 1); // CHECK: atomicrmw add i32
-  (void) __sync_fetch_and_add (&ui, 1); // CHECK: atomicrmw add i32
-  (void) __sync_fetch_and_add (&sll, 1); // CHECK: atomicrmw add i64
-  (void) __sync_fetch_and_add (&ull, 1); // CHECK: atomicrmw add i64
-
-  (void) __sync_fetch_and_sub (&sc, 1); // CHECK: atomicrmw sub i8
-  (void) __sync_fetch_and_sub (&uc, 1); // CHECK: atomicrmw sub i8
-  (void) __sync_fetch_and_sub (&ss, 1); // CHECK: atomicrmw sub i16
-  (void) __sync_fetch_and_sub (&us, 1); // CHECK: atomicrmw sub i16
-  (void) __sync_fetch_and_sub (&si, 1); // CHECK: atomicrmw sub i32
-  (void) __sync_fetch_and_sub (&ui, 1); // CHECK: atomicrmw sub i32
-  (void) __sync_fetch_and_sub (&sll, 1); // CHECK: atomicrmw sub i64
-  (void) __sync_fetch_and_sub (&ull, 1); // CHECK: atomicrmw sub i64
-
-  (void) __sync_fetch_and_or (&sc, 1); // CHECK: atomicrmw or i8
-  (void) __sync_fetch_and_or (&uc, 1); // CHECK: atomicrmw or i8
-  (void) __sync_fetch_and_or (&ss, 1); // CHECK: atomicrmw or i16
-  (void) __sync_fetch_and_or (&us, 1); // CHECK: atomicrmw or i16
-  (void) __sync_fetch_and_or (&si, 1); // CHECK: atomicrmw or i32
-  (void) __sync_fetch_and_or (&ui, 1); // CHECK: atomicrmw or i32
-  (void) __sync_fetch_and_or (&sll, 1); // CHECK: atomicrmw or i64
-  (void) __sync_fetch_and_or (&ull, 1); // CHECK: atomicrmw or i64
-
-  (void) __sync_fetch_and_xor (&sc, 1); // CHECK: atomicrmw xor i8
-  (void) __sync_fetch_and_xor (&uc, 1); // CHECK: atomicrmw xor i8
-  (void) __sync_fetch_and_xor (&ss, 1); // CHECK: atomicrmw xor i16
-  (void) __sync_fetch_and_xor (&us, 1); // CHECK: atomicrmw xor i16
-  (void) __sync_fetch_and_xor (&si, 1); // CHECK: atomicrmw xor i32
-  (void) __sync_fetch_and_xor (&ui, 1); // CHECK: atomicrmw xor i32
-  (void) __sync_fetch_and_xor (&sll, 1); // CHECK: atomicrmw xor i64
-  (void) __sync_fetch_and_xor (&ull, 1); // CHECK: atomicrmw xor i64
-  (void) __sync_fetch_and_xor (&u128, 1); // CHECK: atomicrmw xor i128
-  (void) __sync_fetch_and_xor (&s128, 1); // CHECK: atomicrmw xor i128
-
-  (void) __sync_fetch_and_nand (&sc, 1); // CHECK: atomicrmw nand i8
-  (void) __sync_fetch_and_nand (&uc, 1); // CHECK: atomicrmw nand i8
-  (void) __sync_fetch_and_nand (&ss, 1); // CHECK: atomicrmw nand i16
-  (void) __sync_fetch_and_nand (&us, 1); // CHECK: atomicrmw nand i16
-  (void) __sync_fetch_and_nand (&si, 1); // CHECK: atomicrmw nand i32
-  (void) __sync_fetch_and_nand (&ui, 1); // CHECK: atomicrmw nand i32
-  (void) __sync_fetch_and_nand (&sll, 1); // CHECK: atomicrmw nand i64
-  (void) __sync_fetch_and_nand (&ull, 1); // CHECK: atomicrmw nand i64
-
-  (void) __sync_fetch_and_and (&sc, 1); // CHECK: atomicrmw and i8
-  (void) __sync_fetch_and_and (&uc, 1); // CHECK: atomicrmw and i8
-  (void) __sync_fetch_and_and (&ss, 1); // CHECK: atomicrmw and i16
-  (void) __sync_fetch_and_and (&us, 1); // CHECK: atomicrmw and i16
-  (void) __sync_fetch_and_and (&si, 1); // CHECK: atomicrmw and i32
-  (void) __sync_fetch_and_and (&ui, 1); // CHECK: atomicrmw and i32
-  (void) __sync_fetch_and_and (&sll, 1); // CHECK: atomicrmw and i64
-  (void) __sync_fetch_and_and (&ull, 1); // CHECK: atomicrmw and i64
+  (void) __sync_fetch_and_add (&sc, 1); // CHECK: atomicrmw add i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_add (&uc, 1); // CHECK: atomicrmw add i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_add (&ss, 1); // CHECK: atomicrmw add i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_add (&us, 1); // CHECK: atomicrmw add i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_add (&si, 1); // CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_add (&ui, 1); // CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_add (&sll, 1); // CHECK: atomicrmw add i64* {{.*}} seq_cst, align 8
+  (void) __sync_fetch_and_add (&ull, 1); // CHECK: atomicrmw add i64* {{.*}} seq_cst, align 8
+
+  (void) __sync_fetch_and_sub (&sc, 1); // CHECK: atomicrmw sub i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_sub (&uc, 1); // CHECK: atomicrmw sub i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_sub (&ss, 1); // CHECK: atomicrmw sub i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_sub (&us, 1); // CHECK: atomicrmw sub i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_sub (&si, 1); // CHECK: atomicrmw sub i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_sub (&ui, 1); // CHECK: atomicrmw sub i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_sub (&sll, 1); // CHECK: atomicrmw sub i64* {{.*}} seq_cst, align 8
+  (void) __sync_fetch_and_sub (&ull, 1); // CHECK: atomicrmw sub i64* {{.*}} seq_cst, align 8
+
+  (void) __sync_fetch_and_or (&sc, 1); // CHECK: atomicrmw or i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_or (&uc, 1); // CHECK: atomicrmw or i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_or (&ss, 1); // CHECK: atomicrmw or i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_or (&us, 1); // CHECK: atomicrmw or i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_or (&si, 1); // CHECK: atomicrmw or i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_or (&ui, 1); // CHECK: atomicrmw or i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_or (&sll, 1); // CHECK: atomicrmw or i64* {{.*}} seq_cst, align 8
+  (void) __sync_fetch_and_or (&ull, 1); // CHECK: atomicrmw or i64* {{.*}} seq_cst, align 8
+
+  (void) __sync_fetch_and_xor (&sc, 1); // CHECK: atomicrmw xor i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_xor (&uc, 1); // CHECK: atomicrmw xor i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_xor (&ss, 1); // CHECK: atomicrmw xor i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_xor (&us, 1); // CHECK: atomicrmw xor i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_xor (&si, 1); // CHECK: atomicrmw xor i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_xor (&ui, 1); // CHECK: atomicrmw xor i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_xor (&sll, 1); // CHECK: atomicrmw xor i64* {{.*}} seq_cst, align 8
+  (void) __sync_fetch_and_xor (&ull, 1); // CHECK: atomicrmw xor i64* {{.*}} seq_cst, align 8
+  (void) __sync_fetch_and_xor (&u128, 1); // CHECK: atomicrmw xor i128* {{.*}} seq_cst, align 16
+  (void) __sync_fetch_and_xor (&s128, 1); // CHECK: atomicrmw xor i128* {{.*}} seq_cst, align 16
+
+  (void) __sync_fetch_and_nand (&sc, 1); // CHECK: atomicrmw nand i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_nand (&uc, 1); // CHECK: atomicrmw nand i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_nand (&ss, 1); // CHECK: atomicrmw nand i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_nand (&us, 1); // CHECK: atomicrmw nand i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_nand (&si, 1); // CHECK: atomicrmw nand i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_nand (&ui, 1); // CHECK: atomicrmw nand i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_nand (&sll, 1); // CHECK: atomicrmw nand i64* {{.*}} seq_cst, align 8
+  (void) __sync_fetch_and_nand (&ull, 1); // CHECK: atomicrmw nand i64* {{.*}} seq_cst, align 8
+
+  (void) __sync_fetch_and_and (&sc, 1); // CHECK: atomicrmw and i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_and (&uc, 1); // CHECK: atomicrmw and i8* {{.*}} seq_cst, align 1
+  (void) __sync_fetch_and_and (&ss, 1); // CHECK: atomicrmw and i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_and (&us, 1); // CHECK: atomicrmw and i16* {{.*}} seq_cst, align 2
+  (void) __sync_fetch_and_and (&si, 1); // CHECK: atomicrmw and i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_and (&ui, 1); // CHECK: atomicrmw and i32* {{.*}} seq_cst, align 4
+  (void) __sync_fetch_and_and (&sll, 1); // CHECK: atomicrmw and i64* {{.*}} seq_cst, align 8
+  (void) __sync_fetch_and_and (&ull, 1); // CHECK: atomicrmw and i64* {{.*}} seq_cst, align 8
 
 }
 
@@ -224,81 +224,81 @@ void test_op_and_fetch (void)
 void test_compare_and_swap (void)
 {
   sc = __sync_val_compare_and_swap (&sc, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} seq_cst, align 1
   // CHECK: extractvalue { i8, i1 } [[PAIR]], 0
 
   uc = __sync_val_compare_and_swap (&uc, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} seq_cst, align 1
   // CHECK: extractvalue { i8, i1 } [[PAIR]], 0
 
   ss = __sync_val_compare_and_swap (&ss, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} seq_cst, align 2
   // CHECK: extractvalue { i16, i1 } [[PAIR]], 0
 
   us = __sync_val_compare_and_swap (&us, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} seq_cst, align 2
   // CHECK: extractvalue { i16, i1 } [[PAIR]], 0
 
   si = __sync_val_compare_and_swap (&si, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} seq_cst, align 4
   // CHECK: extractvalue { i32, i1 } [[PAIR]], 0
 
   ui = __sync_val_compare_and_swap (&ui, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} seq_cst, align 4
   // CHECK: extractvalue { i32, i1 } [[PAIR]], 0
 
   sll = __sync_val_compare_and_swap (&sll, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} seq_cst, align 8
   // CHECK: extractvalue { i64, i1 } [[PAIR]], 0
 
   ull = __sync_val_compare_and_swap (&ull, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} seq_cst, align 8
   // CHECK: extractvalue { i64, i1 } [[PAIR]], 0
 
 
   ui = __sync_bool_compare_and_swap (&sc, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} seq_cst, align 1
   // CHECK: extractvalue { i8, i1 } [[PAIR]], 1
 
   ui = __sync_bool_compare_and_swap (&uc, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i8* {{.*}} seq_cst, align 1
   // CHECK: extractvalue { i8, i1 } [[PAIR]], 1
 
   ui = __sync_bool_compare_and_swap (&ss, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} seq_cst, align 2
   // CHECK: extractvalue { i16, i1 } [[PAIR]], 1
 
   ui = __sync_bool_compare_and_swap (&us, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i16* {{.*}} seq_cst, align 2
   // CHECK: extractvalue { i16, i1 } [[PAIR]], 1
 
   ui = __sync_bool_compare_and_swap (&si, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} seq_cst, align 4
   // CHECK: extractvalue { i32, i1 } [[PAIR]], 1
 
   ui = __sync_bool_compare_and_swap (&ui, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i32* {{.*}} seq_cst, align 4
   // CHECK: extractvalue { i32, i1 } [[PAIR]], 1
 
   ui = __sync_bool_compare_and_swap (&sll, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} seq_cst, align 8
   // CHECK: extractvalue { i64, i1 } [[PAIR]], 1
 
   ui = __sync_bool_compare_and_swap (&ull, uc, sc);
-  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64
+  // CHECK: [[PAIR:%[a-z0-9._]+]] = cmpxchg i64* {{.*}} seq_cst, align 8
   // CHECK: extractvalue { i64, i1 } [[PAIR]], 1
 }
 
 void test_lock (void)
 {
-  sc = __sync_lock_test_and_set (&sc, 1); // CHECK: atomicrmw xchg i8
-  uc = __sync_lock_test_and_set (&uc, 1); // CHECK: atomicrmw xchg i8
-  ss = __sync_lock_test_and_set (&ss, 1); // CHECK: atomicrmw xchg i16
-  us = __sync_lock_test_and_set (&us, 1); // CHECK: atomicrmw xchg i16
-  si = __sync_lock_test_and_set (&si, 1); // CHECK: atomicrmw xchg i32
-  ui = __sync_lock_test_and_set (&ui, 1); // CHECK: atomicrmw xchg i32
-  sll = __sync_lock_test_and_set (&sll, 1); // CHECK: atomicrmw xchg i64
-  ull = __sync_lock_test_and_set (&ull, 1); // CHECK: atomicrmw xchg i64
+  sc = __sync_lock_test_and_set (&sc, 1); // CHECK: atomicrmw xchg i8* {{.*}} seq_cst, align 1
+  uc = __sync_lock_test_and_set (&uc, 1); // CHECK: atomicrmw xchg i8* {{.*}} seq_cst, align 1
+  ss = __sync_lock_test_and_set (&ss, 1); // CHECK: atomicrmw xchg i16* {{.*}} seq_cst, align 2
+  us = __sync_lock_test_and_set (&us, 1); // CHECK: atomicrmw xchg i16* {{.*}} seq_cst, align 2
+  si = __sync_lock_test_and_set (&si, 1); // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4
+  ui = __sync_lock_test_and_set (&ui, 1); // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4
+  sll = __sync_lock_test_and_set (&sll, 1); // CHECK: atomicrmw xchg i64* {{.*}} seq_cst, align 8
+  ull = __sync_lock_test_and_set (&ull, 1); // CHECK: atomicrmw xchg i64* {{.*}} seq_cst, align 8
 
   __sync_synchronize (); // CHECK: fence seq_cst
 
@@ -313,8 +313,8 @@ void test_lock (void)
 }
 
 void test_atomic(void) {
-  ui = __atomic_fetch_min(&ui, 5, __ATOMIC_RELAXED); // CHECK: atomicrmw umin {{.*}} monotonic
-  si = __atomic_fetch_min(&si, 5, __ATOMIC_SEQ_CST); // CHECK: atomicrmw min {{.*}} seq_cst
-  ui = __atomic_fetch_max(&ui, 5, __ATOMIC_ACQUIRE); // CHECK: atomicrmw umax {{.*}} acquire
-  si = __atomic_fetch_max(&si, 5, __ATOMIC_RELEASE); // CHECK: atomicrmw max {{.*}} release
+  ui = __atomic_fetch_min(&ui, 5, __ATOMIC_RELAXED); // CHECK: atomicrmw umin {{.*}} monotonic, align 4
+  si = __atomic_fetch_min(&si, 5, __ATOMIC_SEQ_CST); // CHECK: atomicrmw min {{.*}} seq_cst, align 4
+  ui = __atomic_fetch_max(&ui, 5, __ATOMIC_ACQUIRE); // CHECK: atomicrmw umax {{.*}} acquire, align 4
+  si = __atomic_fetch_max(&si, 5, __ATOMIC_RELEASE); // CHECK: atomicrmw max {{.*}} release, align 4
 }

diff  --git a/clang/test/CodeGen/RISCV/riscv-atomics.c b/clang/test/CodeGen/RISCV/riscv-atomics.c
index 9966543be1b6..b55a228815e9 100644
--- a/clang/test/CodeGen/RISCV/riscv-atomics.c
+++ b/clang/test/CodeGen/RISCV/riscv-atomics.c
@@ -19,13 +19,13 @@ void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
   // RV32I:  call zeroext i8 @__atomic_fetch_add_1
   // RV32IA: load atomic i8, i8* %a seq_cst, align 1
   // RV32IA: store atomic i8 %b, i8* %a seq_cst, align 1
-  // RV32IA: atomicrmw add i8* %a, i8 %b seq_cst
+  // RV32IA: atomicrmw add i8* %a, i8 %b seq_cst, align 1
   // RV64I:  call zeroext i8 @__atomic_load_1
   // RV64I:  call void @__atomic_store_1
   // RV64I:  call zeroext i8 @__atomic_fetch_add_1
   // RV64IA: load atomic i8, i8* %a seq_cst, align 1
   // RV64IA: store atomic i8 %b, i8* %a seq_cst, align 1
-  // RV64IA: atomicrmw add i8* %a, i8 %b seq_cst
+  // RV64IA: atomicrmw add i8* %a, i8 %b seq_cst, align 1
   __c11_atomic_load(a, memory_order_seq_cst);
   __c11_atomic_store(a, b, memory_order_seq_cst);
   __c11_atomic_fetch_add(a, b, memory_order_seq_cst);
@@ -37,13 +37,13 @@ void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
   // RV32I:  call i32 @__atomic_fetch_add_4
   // RV32IA: load atomic i32, i32* %a seq_cst, align 4
   // RV32IA: store atomic i32 %b, i32* %a seq_cst, align 4
-  // RV32IA: atomicrmw add i32* %a, i32 %b seq_cst
+  // RV32IA: atomicrmw add i32* %a, i32 %b seq_cst, align 4
   // RV64I:  call signext i32 @__atomic_load_4
   // RV64I:  call void @__atomic_store_4
   // RV64I:  call signext i32 @__atomic_fetch_add_4
   // RV64IA: load atomic i32, i32* %a seq_cst, align 4
   // RV64IA: store atomic i32 %b, i32* %a seq_cst, align 4
-  // RV64IA: atomicrmw add i32* %a, i32 %b seq_cst
+  // RV64IA: atomicrmw add i32* %a, i32 %b seq_cst, align 4
   __c11_atomic_load(a, memory_order_seq_cst);
   __c11_atomic_store(a, b, memory_order_seq_cst);
   __c11_atomic_fetch_add(a, b, memory_order_seq_cst);
@@ -61,7 +61,7 @@ void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
   // RV64I:  call i64 @__atomic_fetch_add_8
   // RV64IA: load atomic i64, i64* %a seq_cst, align 8
   // RV64IA: store atomic i64 %b, i64* %a seq_cst, align 8
-  // RV64IA: atomicrmw add i64* %a, i64 %b seq_cst
+  // RV64IA: atomicrmw add i64* %a, i64 %b seq_cst, align 8
   __c11_atomic_load(a, memory_order_seq_cst);
   __c11_atomic_store(a, b, memory_order_seq_cst);
   __c11_atomic_fetch_add(a, b, memory_order_seq_cst);

diff  --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c
index 130ff45c0346..7ad7de8c917e 100644
--- a/clang/test/CodeGen/X86/x86-atomic-long_double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c
@@ -25,7 +25,7 @@ long double testinc(_Atomic long double *addr) {
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
+  // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
@@ -87,7 +87,7 @@ long double testdec(_Atomic long double *addr) {
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
+  // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
@@ -150,7 +150,7 @@ long double testcompassign(_Atomic long double *addr) {
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
+  // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
@@ -260,7 +260,7 @@ long double test_volatile_inc(volatile _Atomic long double *addr) {
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
+  // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
@@ -321,7 +321,7 @@ long double test_volatile_dec(volatile _Atomic long double *addr) {
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
+  // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
@@ -383,7 +383,7 @@ long double test_volatile_compassign(volatile _Atomic long double *addr) {
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
   // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
+  // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*

diff  --git a/clang/test/CodeGen/X86/x86_64-atomic-128.c b/clang/test/CodeGen/X86/x86_64-atomic-128.c
index 2069e455828d..e0e7515baae8 100644
--- a/clang/test/CodeGen/X86/x86_64-atomic-128.c
+++ b/clang/test/CodeGen/X86/x86_64-atomic-128.c
@@ -6,24 +6,24 @@
 
 __int128 test_sync_call(__int128 *addr, __int128 val) {
   // CHECK-LABEL: @test_sync_call
-  // CHECK: atomicrmw add i128
+  // CHECK: atomicrmw add i128* {{.*}} seq_cst, align 16
   return __sync_fetch_and_add(addr, val);
 }
 
 __int128 test_c11_call(_Atomic __int128 *addr, __int128 val) {
   // CHECK-LABEL: @test_c11_call
-  // CHECK: atomicrmw sub
+  // CHECK: atomicrmw sub i128* {{.*}} monotonic, align 16
   return __c11_atomic_fetch_sub(addr, val, 0);
 }
 
 __int128 test_atomic_call(__int128 *addr, __int128 val) {
   // CHECK-LABEL: @test_atomic_call
-  // CHECK: atomicrmw or
+  // CHECK: atomicrmw or i128* {{.*}} monotonic, align 16
   return __atomic_fetch_or(addr, val, 0);
 }
 
 __int128 test_expression(_Atomic __int128 *addr) {
   // CHECK-LABEL: @test_expression
-  // CHECK: atomicrmw and
+  // CHECK: atomicrmw and i128* {{.*}} seq_cst, align 16
   *addr &= 1;
 }

diff  --git a/clang/test/CodeGen/arm-atomics-m.c b/clang/test/CodeGen/arm-atomics-m.c
index cd9e71e5d944..e314bbfc764e 100644
--- a/clang/test/CodeGen/arm-atomics-m.c
+++ b/clang/test/CodeGen/arm-atomics-m.c
@@ -11,14 +11,14 @@ typedef enum memory_order {
 void test_presence(void)
 {
   // CHECK-LABEL: @test_presence
-  // CHECK: atomicrmw add i32* {{.*}} seq_cst
+  // CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4
   __atomic_fetch_add(&i, 1, memory_order_seq_cst);
-  // CHECK: atomicrmw sub i32* {{.*}} seq_cst
+  // CHECK: atomicrmw sub i32* {{.*}} seq_cst, align 4
   __atomic_fetch_sub(&i, 1, memory_order_seq_cst);
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   int r;
   __atomic_load(&i, &r, memory_order_seq_cst);
-  // CHECK: store atomic i32 {{.*}} seq_cst
+  // CHECK: store atomic i32 {{.*}} seq_cst, align 4
   r = 0;
   __atomic_store(&i, &r, memory_order_seq_cst);
 

diff  --git a/clang/test/CodeGen/arm-atomics.c b/clang/test/CodeGen/arm-atomics.c
index aa5a6ecd0caa..653485374bb4 100644
--- a/clang/test/CodeGen/arm-atomics.c
+++ b/clang/test/CodeGen/arm-atomics.c
@@ -13,25 +13,25 @@ typedef enum memory_order {
 void test_presence(void)
 {
   // CHECK-LABEL: @test_presence
-  // CHECK: atomicrmw add i32* {{.*}} seq_cst
+  // CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4
   __atomic_fetch_add(&i, 1, memory_order_seq_cst);
-  // CHECK: atomicrmw sub i32* {{.*}} seq_cst
+  // CHECK: atomicrmw sub i32* {{.*}} seq_cst, align 4
   __atomic_fetch_sub(&i, 1, memory_order_seq_cst);
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   int r;
   __atomic_load(&i, &r, memory_order_seq_cst);
-  // CHECK: store atomic i32 {{.*}} seq_cst
+  // CHECK: store atomic i32 {{.*}} seq_cst, align 4
   r = 0;
   __atomic_store(&i, &r, memory_order_seq_cst);
 
-  // CHECK: atomicrmw add i64* {{.*}} seq_cst
+  // CHECK: atomicrmw add i64* {{.*}} seq_cst, align 8
   __atomic_fetch_add(&l, 1, memory_order_seq_cst);
-  // CHECK: atomicrmw sub i64* {{.*}} seq_cst
+  // CHECK: atomicrmw sub i64* {{.*}} seq_cst, align 8
   __atomic_fetch_sub(&l, 1, memory_order_seq_cst);
-  // CHECK: load atomic i64, i64* {{.*}} seq_cst
+  // CHECK: load atomic i64, i64* {{.*}} seq_cst, align 8
   long long rl;
   __atomic_load(&l, &rl, memory_order_seq_cst);
-  // CHECK: store atomic i64 {{.*}} seq_cst
+  // CHECK: store atomic i64 {{.*}} seq_cst, align 8
   rl = 0;
   __atomic_store(&l, &rl, memory_order_seq_cst);
 }

diff  --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index 4a9642e173c1..ca8f270bd4f3 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -13,7 +13,7 @@ long test_InterlockedAdd_constant(long volatile *Addend) {
 }
 
 // CHECK-LABEL: define {{.*}} i32 @test_InterlockedAdd(i32* %Addend, i32 %Value) {{.*}} {
-// CHECK-MSVC: %[[OLDVAL:[0-9]+]] = atomicrmw add i32* %1, i32 %2 seq_cst
+// CHECK-MSVC: %[[OLDVAL:[0-9]+]] = atomicrmw add i32* %1, i32 %2 seq_cst, align 4
 // CHECK-MSVC: %[[NEWVAL:[0-9]+]] = add i32 %[[OLDVAL:[0-9]+]], %2
 // CHECK-MSVC: ret i32 %[[NEWVAL:[0-9]+]]
 // CHECK-LINUX: error: implicit declaration of function '_InterlockedAdd'

diff  --git a/clang/test/CodeGen/atomic-ops.c b/clang/test/CodeGen/atomic-ops.c
index 2cf5d2beb3a8..4deb1322e0ff 100644
--- a/clang/test/CodeGen/atomic-ops.c
+++ b/clang/test/CodeGen/atomic-ops.c
@@ -14,13 +14,13 @@
 
 int fi1(_Atomic(int) *i) {
   // CHECK-LABEL: @fi1
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   return __c11_atomic_load(i, memory_order_seq_cst);
 }
 
 int fi1a(int *i) {
   // CHECK-LABEL: @fi1a
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   int v;
   __atomic_load(i, &v, memory_order_seq_cst);
   return v;
@@ -28,72 +28,72 @@ int fi1a(int *i) {
 
 int fi1b(int *i) {
   // CHECK-LABEL: @fi1b
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   return __atomic_load_n(i, memory_order_seq_cst);
 }
 
 int fi1c(atomic_int *i) {
   // CHECK-LABEL: @fi1c
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   return atomic_load(i);
 }
 
 void fi2(_Atomic(int) *i) {
   // CHECK-LABEL: @fi2
-  // CHECK: store atomic i32 {{.*}} seq_cst
+  // CHECK: store atomic i32 {{.*}} seq_cst, align 4
   __c11_atomic_store(i, 1, memory_order_seq_cst);
 }
 
 void fi2a(int *i) {
   // CHECK-LABEL: @fi2a
-  // CHECK: store atomic i32 {{.*}} seq_cst
+  // CHECK: store atomic i32 {{.*}} seq_cst, align 4
   int v = 1;
   __atomic_store(i, &v, memory_order_seq_cst);
 }
 
 void fi2b(int *i) {
   // CHECK-LABEL: @fi2b
-  // CHECK: store atomic i32 {{.*}} seq_cst
+  // CHECK: store atomic i32 {{.*}} seq_cst, align 4
   __atomic_store_n(i, 1, memory_order_seq_cst);
 }
 
 void fi2c(atomic_int *i) {
   // CHECK-LABEL: @fi2c
-  // CHECK: store atomic i32 {{.*}} seq_cst
+  // CHECK: store atomic i32 {{.*}} seq_cst, align 4
   atomic_store(i, 1);
 }
 
 int fi3(_Atomic(int) *i) {
   // CHECK-LABEL: @fi3
-  // CHECK: atomicrmw and
+  // CHECK: atomicrmw and {{.*}} seq_cst, align 4
   // CHECK-NOT: and
   return __c11_atomic_fetch_and(i, 1, memory_order_seq_cst);
 }
 
 int fi3a(int *i) {
   // CHECK-LABEL: @fi3a
-  // CHECK: atomicrmw xor
+  // CHECK: atomicrmw xor {{.*}} seq_cst, align 4
   // CHECK-NOT: xor
   return __atomic_fetch_xor(i, 1, memory_order_seq_cst);
 }
 
 int fi3b(int *i) {
   // CHECK-LABEL: @fi3b
-  // CHECK: atomicrmw add
+  // CHECK: atomicrmw add {{.*}} seq_cst, align 4
   // CHECK: add
   return __atomic_add_fetch(i, 1, memory_order_seq_cst);
 }
 
 int fi3c(int *i) {
   // CHECK-LABEL: @fi3c
-  // CHECK: atomicrmw nand
+  // CHECK: atomicrmw nand {{.*}} seq_cst, align 4
   // CHECK-NOT: and
   return __atomic_fetch_nand(i, 1, memory_order_seq_cst);
 }
 
 int fi3d(int *i) {
   // CHECK-LABEL: @fi3d
-  // CHECK: atomicrmw nand
+  // CHECK: atomicrmw nand {{.*}} seq_cst, align 4
   // CHECK: and
   // CHECK: xor
   return __atomic_nand_fetch(i, 1, memory_order_seq_cst);
@@ -101,7 +101,7 @@ int fi3d(int *i) {
 
 int fi3e(atomic_int *i) {
   // CHECK-LABEL: @fi3e
-  // CHECK: atomicrmw or
+  // CHECK: atomicrmw or {{.*}} seq_cst, align 4
   // CHECK-NOT: {{ or }}
   return atomic_fetch_or(i, 1);
 }
@@ -109,14 +109,14 @@ int fi3e(atomic_int *i) {
 int fi3f(int *i) {
   // CHECK-LABEL: @fi3f
   // CHECK-NOT: store volatile
-  // CHECK: atomicrmw or
+  // CHECK: atomicrmw or {{.*}} seq_cst, align 4
   // CHECK-NOT: {{ or }}
   return __atomic_fetch_or(i, (short)1, memory_order_seq_cst);
 }
 
 _Bool fi4(_Atomic(int) *i) {
   // CHECK-LABEL: @fi4(
-  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]]
+  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] acquire acquire, align 4
   // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
   // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
   // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
@@ -127,7 +127,7 @@ _Bool fi4(_Atomic(int) *i) {
 
 _Bool fi4a(int *i) {
   // CHECK-LABEL: @fi4a
-  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]]
+  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] acquire acquire, align 4
   // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
   // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
   // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
@@ -139,7 +139,7 @@ _Bool fi4a(int *i) {
 
 _Bool fi4b(int *i) {
   // CHECK-LABEL: @fi4b(
-  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg weak i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]]
+  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg weak i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] acquire acquire, align 4
   // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
   // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
   // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
@@ -150,7 +150,7 @@ _Bool fi4b(int *i) {
 
 _Bool fi4c(atomic_int *i) {
   // CHECK-LABEL: @fi4c
-  // CHECK: cmpxchg i32*
+  // CHECK: cmpxchg i32* {{.*}} seq_cst seq_cst, align 4
   int cmp = 0;
   return atomic_compare_exchange_strong(i, &cmp, 1);
 }
@@ -159,19 +159,19 @@ _Bool fi4c(atomic_int *i) {
 _Bool fi4d(_Atomic(int) *i, int _AS1 *ptr2) {
   // CHECK-LABEL: @fi4d(
   // CHECK: [[EXPECTED:%[.0-9A-Z_a-z]+]] = load i32, i32 addrspace(1)* %{{[0-9]+}}
-  // CHECK: cmpxchg i32* %{{[0-9]+}}, i32 [[EXPECTED]], i32 %{{[0-9]+}} acquire acquire
+  // CHECK: cmpxchg i32* %{{[0-9]+}}, i32 [[EXPECTED]], i32 %{{[0-9]+}} acquire acquire, align 4
   return __c11_atomic_compare_exchange_strong(i, ptr2, 1, memory_order_acquire, memory_order_acquire);
 }
 
 float ff1(_Atomic(float) *d) {
   // CHECK-LABEL: @ff1
-  // CHECK: load atomic i32, i32* {{.*}} monotonic
+  // CHECK: load atomic i32, i32* {{.*}} monotonic, align 4
   return __c11_atomic_load(d, memory_order_relaxed);
 }
 
 void ff2(_Atomic(float) *d) {
   // CHECK-LABEL: @ff2
-  // CHECK: store atomic i32 {{.*}} release
+  // CHECK: store atomic i32 {{.*}} release, align 4
   __c11_atomic_store(d, 1, memory_order_release);
 }
 
@@ -272,41 +272,41 @@ _Bool fd4(struct S *a, struct S *b, struct S *c) {
 
 int* fp1(_Atomic(int*) *p) {
   // CHECK-LABEL: @fp1
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   return __c11_atomic_load(p, memory_order_seq_cst);
 }
 
 int* fp2(_Atomic(int*) *p) {
   // CHECK-LABEL: @fp2
   // CHECK: store i32 4
-  // CHECK: atomicrmw add {{.*}} monotonic
+  // CHECK: atomicrmw add {{.*}} monotonic, align 4
   return __c11_atomic_fetch_add(p, 1, memory_order_relaxed);
 }
 
 int *fp2a(int **p) {
   // CHECK-LABEL: @fp2a
   // CHECK: store i32 4
-  // CHECK: atomicrmw sub {{.*}} monotonic
+  // CHECK: atomicrmw sub {{.*}} monotonic, align 4
   // Note, the GNU builtins do not multiply by sizeof(T)!
   return __atomic_fetch_sub(p, 4, memory_order_relaxed);
 }
 
 _Complex float fc(_Atomic(_Complex float) *c) {
   // CHECK-LABEL: @fc
-  // CHECK: atomicrmw xchg i64*
+  // CHECK: atomicrmw xchg i64* {{.*}} seq_cst, align 8
   return __c11_atomic_exchange(c, 2, memory_order_seq_cst);
 }
 
 typedef struct X { int x; } X;
 X fs(_Atomic(X) *c) {
   // CHECK-LABEL: @fs
-  // CHECK: atomicrmw xchg i32*
+  // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4
   return __c11_atomic_exchange(c, (X){2}, memory_order_seq_cst);
 }
 
 X fsa(X *c, X *d) {
   // CHECK-LABEL: @fsa
-  // CHECK: atomicrmw xchg i32*
+  // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4
   X ret;
   __atomic_exchange(c, d, &ret, memory_order_seq_cst);
   return ret;
@@ -314,20 +314,20 @@ X fsa(X *c, X *d) {
 
 _Bool fsb(_Bool *c) {
   // CHECK-LABEL: @fsb
-  // CHECK: atomicrmw xchg i8*
+  // CHECK: atomicrmw xchg i8* {{.*}} seq_cst, align 1
   return __atomic_exchange_n(c, 1, memory_order_seq_cst);
 }
 
 char flag1;
 volatile char flag2;
 void test_and_set() {
-  // CHECK: atomicrmw xchg i8* @flag1, i8 1 seq_cst
+  // CHECK: atomicrmw xchg i8* @flag1, i8 1 seq_cst, align 1
   __atomic_test_and_set(&flag1, memory_order_seq_cst);
-  // CHECK: atomicrmw volatile xchg i8* @flag2, i8 1 acquire
+  // CHECK: atomicrmw volatile xchg i8* @flag2, i8 1 acquire, align 1
   __atomic_test_and_set(&flag2, memory_order_acquire);
-  // CHECK: store atomic volatile i8 0, i8* @flag2 release
+  // CHECK: store atomic volatile i8 0, i8* @flag2 release, align 1
   __atomic_clear(&flag2, memory_order_release);
-  // CHECK: store atomic i8 0, i8* @flag1 seq_cst
+  // CHECK: store atomic i8 0, i8* @flag1 seq_cst, align 1
   __atomic_clear(&flag1, memory_order_seq_cst);
 }
 
@@ -463,19 +463,19 @@ void atomic_init_foo()
 // CHECK-LABEL: @failureOrder
 void failureOrder(_Atomic(int) *ptr, int *ptr2) {
   __c11_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed);
-  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acquire monotonic
+  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acquire monotonic, align 4
 
   __c11_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire);
-  // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst acquire
+  // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst acquire, align 4
 
   // Unknown ordering: conservatively pick strongest valid option (for now!).
   __atomic_compare_exchange(ptr2, ptr2, ptr2, 0, memory_order_acq_rel, *ptr2);
-  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acq_rel acquire
+  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} acq_rel acquire, align 4
 
   // Undefined behaviour: don't really care what that last ordering is so leave
   // it out:
   __atomic_compare_exchange_n(ptr2, ptr2, 43, 1, memory_order_seq_cst, 42);
-  // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst
+  // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} seq_cst {{.*}}, align 4
 }
 
 // CHECK-LABEL: @generalFailureOrder
@@ -516,35 +516,35 @@ void generalFailureOrder(_Atomic(int) *ptr, int *ptr2, int success, int fail) {
   // CHECK-NEXT: ]
 
   // CHECK: [[MONOTONIC_MONOTONIC]]
-  // CHECK: cmpxchg {{.*}} monotonic monotonic
+  // CHECK: cmpxchg {{.*}} monotonic monotonic, align
   // CHECK: br
 
   // CHECK: [[ACQUIRE_MONOTONIC]]
-  // CHECK: cmpxchg {{.*}} acquire monotonic
+  // CHECK: cmpxchg {{.*}} acquire monotonic, align
   // CHECK: br
 
   // CHECK: [[ACQUIRE_ACQUIRE]]
-  // CHECK: cmpxchg {{.*}} acquire acquire
+  // CHECK: cmpxchg {{.*}} acquire acquire, align
   // CHECK: br
 
   // CHECK: [[ACQREL_MONOTONIC]]
-  // CHECK: cmpxchg {{.*}} acq_rel monotonic
+  // CHECK: cmpxchg {{.*}} acq_rel monotonic, align
   // CHECK: br
 
   // CHECK: [[ACQREL_ACQUIRE]]
-  // CHECK: cmpxchg {{.*}} acq_rel acquire
+  // CHECK: cmpxchg {{.*}} acq_rel acquire, align
   // CHECK: br
 
   // CHECK: [[SEQCST_MONOTONIC]]
-  // CHECK: cmpxchg {{.*}} seq_cst monotonic
+  // CHECK: cmpxchg {{.*}} seq_cst monotonic, align
   // CHECK: br
 
   // CHECK: [[SEQCST_ACQUIRE]]
-  // CHECK: cmpxchg {{.*}} seq_cst acquire
+  // CHECK: cmpxchg {{.*}} seq_cst acquire, align
   // CHECK: br
 
   // CHECK: [[SEQCST_SEQCST]]
-  // CHECK: cmpxchg {{.*}} seq_cst seq_cst
+  // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align
   // CHECK: br
 }
 
@@ -555,12 +555,12 @@ void generalWeakness(int *ptr, int *ptr2, _Bool weak) {
 
   // CHECK: [[STRONG]]
   // CHECK-NOT: br
-  // CHECK: cmpxchg {{.*}} seq_cst seq_cst
+  // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align
   // CHECK: br
 
   // CHECK: [[WEAK]]
   // CHECK-NOT: br
-  // CHECK: cmpxchg weak {{.*}} seq_cst seq_cst
+  // CHECK: cmpxchg weak {{.*}} seq_cst seq_cst, align
   // CHECK: br
 }
 
@@ -569,24 +569,24 @@ void generalWeakness(int *ptr, int *ptr2, _Bool weak) {
 void EMIT_ALL_THE_THINGS(int *ptr, int *ptr2, int new, _Bool weak, int success, int fail) {
   __atomic_compare_exchange(ptr, ptr2, &new, weak, success, fail);
 
-  // CHECK: = cmpxchg {{.*}} monotonic monotonic
-  // CHECK: = cmpxchg weak {{.*}} monotonic monotonic
-  // CHECK: = cmpxchg {{.*}} acquire monotonic
-  // CHECK: = cmpxchg {{.*}} acquire acquire
-  // CHECK: = cmpxchg weak {{.*}} acquire monotonic
-  // CHECK: = cmpxchg weak {{.*}} acquire acquire
-  // CHECK: = cmpxchg {{.*}} release monotonic
-  // CHECK: = cmpxchg weak {{.*}} release monotonic
-  // CHECK: = cmpxchg {{.*}} acq_rel monotonic
-  // CHECK: = cmpxchg {{.*}} acq_rel acquire
-  // CHECK: = cmpxchg weak {{.*}} acq_rel monotonic
-  // CHECK: = cmpxchg weak {{.*}} acq_rel acquire
-  // CHECK: = cmpxchg {{.*}} seq_cst monotonic
-  // CHECK: = cmpxchg {{.*}} seq_cst acquire
-  // CHECK: = cmpxchg {{.*}} seq_cst seq_cst
-  // CHECK: = cmpxchg weak {{.*}} seq_cst monotonic
-  // CHECK: = cmpxchg weak {{.*}} seq_cst acquire
-  // CHECK: = cmpxchg weak {{.*}} seq_cst seq_cst
+  // CHECK: = cmpxchg {{.*}} monotonic monotonic, align
+  // CHECK: = cmpxchg weak {{.*}} monotonic monotonic, align
+  // CHECK: = cmpxchg {{.*}} acquire monotonic, align
+  // CHECK: = cmpxchg {{.*}} acquire acquire, align
+  // CHECK: = cmpxchg weak {{.*}} acquire monotonic, align
+  // CHECK: = cmpxchg weak {{.*}} acquire acquire, align
+  // CHECK: = cmpxchg {{.*}} release monotonic, align
+  // CHECK: = cmpxchg weak {{.*}} release monotonic, align
+  // CHECK: = cmpxchg {{.*}} acq_rel monotonic, align
+  // CHECK: = cmpxchg {{.*}} acq_rel acquire, align
+  // CHECK: = cmpxchg weak {{.*}} acq_rel monotonic, align
+  // CHECK: = cmpxchg weak {{.*}} acq_rel acquire, align
+  // CHECK: = cmpxchg {{.*}} seq_cst monotonic, align
+  // CHECK: = cmpxchg {{.*}} seq_cst acquire, align
+  // CHECK: = cmpxchg {{.*}} seq_cst seq_cst, align
+  // CHECK: = cmpxchg weak {{.*}} seq_cst monotonic, align
+  // CHECK: = cmpxchg weak {{.*}} seq_cst acquire, align
+  // CHECK: = cmpxchg weak {{.*}} seq_cst seq_cst, align
 }
 
 int PR21643() {
@@ -596,7 +596,7 @@ int PR21643() {
   // CHECK: %[[atomicdst:.*]] = alloca i32, align 4
   // CHECK: store i32 1, i32* %[[atomictmp]]
   // CHECK: %[[one:.*]] = load i32, i32* %[[atomictmp]], align 4
-  // CHECK: %[[old:.*]] = atomicrmw or i32 addrspace(257)* inttoptr (i32 776 to i32 addrspace(257)*), i32 %[[one]] monotonic
+  // CHECK: %[[old:.*]] = atomicrmw or i32 addrspace(257)* inttoptr (i32 776 to i32 addrspace(257)*), i32 %[[one]] monotonic, align 4
   // CHECK: %[[new:.*]] = or i32 %[[old]], %[[one]]
   // CHECK: store i32 %[[new]], i32* %[[atomicdst]], align 4
   // CHECK: %[[ret:.*]] = load i32, i32* %[[atomicdst]], align 4
@@ -609,7 +609,7 @@ int PR17306_1(volatile _Atomic(int) *i) {
   // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
   // CHECK-NEXT: store i32* %i, i32** %[[i_addr]]
   // CHECK-NEXT: %[[addr:.*]] = load i32*, i32** %[[i_addr]]
-  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] seq_cst
+  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] seq_cst, align 4
   // CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]]
   // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
   // CHECK-NEXT: ret i32 %[[retval]]
@@ -628,7 +628,7 @@ int PR17306_2(volatile int *i, int value) {
   // CHECK-NEXT: %[[value:.*]] = load i32, i32* %[[value_addr]]
   // CHECK-NEXT: store i32 %[[value]], i32* %[[atomictmp]]
   // CHECK-NEXT: %[[value_lval:.*]] = load i32, i32* %[[atomictmp]]
-  // CHECK-NEXT: %[[old_val:.*]] = atomicrmw volatile add i32* %[[i_lval]], i32 %[[value_lval]] seq_cst
+  // CHECK-NEXT: %[[old_val:.*]] = atomicrmw volatile add i32* %[[i_lval]], i32 %[[value_lval]] seq_cst, align 4
   // CHECK-NEXT: %[[new_val:.*]] = add i32 %[[old_val]], %[[value_lval]]
   // CHECK-NEXT: store i32 %[[new_val]], i32* %[[atomicdst]]
   // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
@@ -651,41 +651,41 @@ void test_underaligned() {
 
   __attribute__((aligned)) struct Underaligned aligned_a, aligned_b, aligned_c;
 
-  // CHECK: load atomic
+  // CHECK: load atomic i64, {{.*}}, align 16
   __atomic_load(&aligned_a, &aligned_b, memory_order_seq_cst);
-  // CHECK: store atomic
+  // CHECK: store atomic i64 {{.*}}, align 16
   __atomic_store(&aligned_a, &aligned_b, memory_order_seq_cst);
-  // CHECK: atomicrmw xchg
+  // CHECK: atomicrmw xchg i64* {{.*}}, align 8
   __atomic_exchange(&aligned_a, &aligned_b, &aligned_c, memory_order_seq_cst);
-  // CHECK: cmpxchg weak
+  // CHECK: cmpxchg weak i64* {{.*}}, align 8
   __atomic_compare_exchange(&aligned_a, &aligned_b, &aligned_c, 1, memory_order_seq_cst, memory_order_seq_cst);
 }
 
 void test_c11_minmax(_Atomic(int) * si, _Atomic(unsigned) * ui, _Atomic(short) * ss, _Atomic(unsigned char) * uc, _Atomic(long long) * sll) {
   // CHECK-LABEL: @test_c11_minmax
 
-  // CHECK: atomicrmw max i32
+  // CHECK: atomicrmw max i32* {{.*}} acquire, align 4
   *si = __c11_atomic_fetch_max(si, 42, memory_order_acquire);
-  // CHECK: atomicrmw min i32
+  // CHECK: atomicrmw min i32* {{.*}} acquire, align 4
   *si = __c11_atomic_fetch_min(si, 42, memory_order_acquire);
-  // CHECK: atomicrmw umax i32
+  // CHECK: atomicrmw umax i32* {{.*}} acquire, align 4
   *ui = __c11_atomic_fetch_max(ui, 42, memory_order_acquire);
-  // CHECK: atomicrmw umin i32
+  // CHECK: atomicrmw umin i32* {{.*}} acquire, align 4
   *ui = __c11_atomic_fetch_min(ui, 42, memory_order_acquire);
 
-  // CHECK: atomicrmw max i16
+  // CHECK: atomicrmw max i16* {{.*}} acquire, align 2
   *ss = __c11_atomic_fetch_max(ss, 42, memory_order_acquire);
-  // CHECK: atomicrmw min i16
+  // CHECK: atomicrmw min i16* {{.*}} acquire, align 2
   *ss = __c11_atomic_fetch_min(ss, 42, memory_order_acquire);
 
-  // CHECK: atomicrmw umax i8
+  // CHECK: atomicrmw umax i8* {{.*}} acquire, align 1
   *uc = __c11_atomic_fetch_max(uc, 42, memory_order_acquire);
-  // CHECK: atomicrmw umin i8
+  // CHECK: atomicrmw umin i8* {{.*}} acquire, align 1
   *uc = __c11_atomic_fetch_min(uc, 42, memory_order_acquire);
 
-  // CHECK: atomicrmw max i64
+  // CHECK: atomicrmw max i64* {{.*}} acquire, align 8
   *sll = __c11_atomic_fetch_max(sll, 42, memory_order_acquire);
-  // CHECK: atomicrmw min i64
+  // CHECK: atomicrmw min i64* {{.*}} acquire, align 8
   *sll = __c11_atomic_fetch_min(sll, 42, memory_order_acquire);
 
 }
@@ -694,37 +694,37 @@ void test_minmax_postop(int *si, unsigned *ui, unsigned short *us, signed char *
   int val = 42;
   // CHECK-LABEL: @test_minmax_postop
 
-  // CHECK: [[OLD:%.*]] = atomicrmw max i32* [[PTR:%.*]], i32 [[RHS:%.*]] release
+  // CHECK: [[OLD:%.*]] = atomicrmw max i32* [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
   // CHECK: [[TST:%.*]] = icmp sgt i32 [[OLD]], [[RHS]]
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
   // CHECK: store i32 [[NEW]], i32*
   *si = __atomic_max_fetch(si, 42, memory_order_release);
 
-  // CHECK: [[OLD:%.*]] = atomicrmw min i32* [[PTR:%.*]], i32 [[RHS:%.*]] release
+  // CHECK: [[OLD:%.*]] = atomicrmw min i32* [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
   // CHECK: [[TST:%.*]] = icmp slt i32 [[OLD]], [[RHS]]
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
   // CHECK: store i32 [[NEW]], i32*
   *si = __atomic_min_fetch(si, 42, memory_order_release);
   
-  // CHECK: [[OLD:%.*]] = atomicrmw umax i32* [[PTR:%.*]], i32 [[RHS:%.*]] release
+  // CHECK: [[OLD:%.*]] = atomicrmw umax i32* [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
   // CHECK: [[TST:%.*]] = icmp ugt i32 [[OLD]], [[RHS]]
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
   // CHECK: store i32 [[NEW]], i32*
   *ui = __atomic_max_fetch(ui, 42, memory_order_release);
 
-  // CHECK: [[OLD:%.*]] = atomicrmw umin i32* [[PTR:%.*]], i32 [[RHS:%.*]] release
+  // CHECK: [[OLD:%.*]] = atomicrmw umin i32* [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
   // CHECK: [[TST:%.*]] = icmp ult i32 [[OLD]], [[RHS]]
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
   // CHECK: store i32 [[NEW]], i32*
   *ui = __atomic_min_fetch(ui, 42, memory_order_release);
 
-  // CHECK: [[OLD:%.*]] = atomicrmw umin i16* [[PTR:%.*]], i16 [[RHS:%.*]] release
+  // CHECK: [[OLD:%.*]] = atomicrmw umin i16* [[PTR:%.*]], i16 [[RHS:%.*]] release, align 2
   // CHECK: [[TST:%.*]] = icmp ult i16 [[OLD]], [[RHS]]
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i16 [[OLD]], i16 [[RHS]]
   // CHECK: store i16 [[NEW]], i16*
   *us = __atomic_min_fetch(us, 42, memory_order_release);
 
-  // CHECK: [[OLD:%.*]] = atomicrmw min i8* [[PTR:%.*]], i8 [[RHS:%.*]] release
+  // CHECK: [[OLD:%.*]] = atomicrmw min i8* [[PTR:%.*]], i8 [[RHS:%.*]] release, align 1
   // CHECK: [[TST:%.*]] = icmp slt i8 [[OLD]], [[RHS]]
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i8 [[OLD]], i8 [[RHS]]
   // CHECK: store i8 [[NEW]], i8*

diff  --git a/clang/test/CodeGen/atomic.c b/clang/test/CodeGen/atomic.c
index 4db3c8e6d69c..4eb21669ab3c 100644
--- a/clang/test/CodeGen/atomic.c
+++ b/clang/test/CodeGen/atomic.c
@@ -11,80 +11,80 @@ int atomic(void) {
   int* ptrval;
 
   old = __sync_fetch_and_add(&val, 1);
-  // CHECK: atomicrmw add i32* %val, i32 1 seq_cst
+  // CHECK: atomicrmw add i32* %val, i32 1 seq_cst, align 4
   
   old = __sync_fetch_and_sub(&valc, 2);
-  // CHECK: atomicrmw sub i8* %valc, i8 2 seq_cst
+  // CHECK: atomicrmw sub i8* %valc, i8 2 seq_cst, align 1
   
   old = __sync_fetch_and_min(&val, 3);
-  // CHECK: atomicrmw min i32* %val, i32 3 seq_cst
+  // CHECK: atomicrmw min i32* %val, i32 3 seq_cst, align 4
   
   old = __sync_fetch_and_max(&val, 4);
-  // CHECK: atomicrmw max i32* %val, i32 4 seq_cst
+  // CHECK: atomicrmw max i32* %val, i32 4 seq_cst, align 4
   
   old = __sync_fetch_and_umin(&uval, 5u);
-  // CHECK: atomicrmw umin i32* %uval, i32 5 seq_cst
+  // CHECK: atomicrmw umin i32* %uval, i32 5 seq_cst, align 4
   
   old = __sync_fetch_and_umax(&uval, 6u);
-  // CHECK: atomicrmw umax i32* %uval, i32 6 seq_cst
+  // CHECK: atomicrmw umax i32* %uval, i32 6 seq_cst, align 4
   
   old = __sync_lock_test_and_set(&val, 7);
-  // CHECK: atomicrmw xchg i32* %val, i32 7 seq_cst
+  // CHECK: atomicrmw xchg i32* %val, i32 7 seq_cst, align 4
   
   old = __sync_swap(&val, 8);
-  // CHECK: atomicrmw xchg i32* %val, i32 8 seq_cst
+  // CHECK: atomicrmw xchg i32* %val, i32 8 seq_cst, align 4
   
   old = __sync_val_compare_and_swap(&val, 4, 1976);
-  // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* %val, i32 4, i32 1976 seq_cst
+  // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* %val, i32 4, i32 1976 seq_cst seq_cst, align 4
   // CHECK: extractvalue { i32, i1 } [[PAIR]], 0
 
   old = __sync_bool_compare_and_swap(&val, 4, 1976);
-  // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* %val, i32 4, i32 1976 seq_cst
+  // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* %val, i32 4, i32 1976 seq_cst seq_cst, align 4
   // CHECK: extractvalue { i32, i1 } [[PAIR]], 1
 
   old = __sync_fetch_and_and(&val, 0x9);
-  // CHECK: atomicrmw and i32* %val, i32 9 seq_cst
+  // CHECK: atomicrmw and i32* %val, i32 9 seq_cst, align 4
 
   old = __sync_fetch_and_or(&val, 0xa);
-  // CHECK: atomicrmw or i32* %val, i32 10 seq_cst
+  // CHECK: atomicrmw or i32* %val, i32 10 seq_cst, align 4
 
   old = __sync_fetch_and_xor(&val, 0xb);
-  // CHECK: atomicrmw xor i32* %val, i32 11 seq_cst
+  // CHECK: atomicrmw xor i32* %val, i32 11 seq_cst, align 4
  
   old = __sync_fetch_and_nand(&val, 0xc);
-  // CHECK: atomicrmw nand i32* %val, i32 12 seq_cst
+  // CHECK: atomicrmw nand i32* %val, i32 12 seq_cst, align 4
  
   old = __sync_add_and_fetch(&val, 1);
-  // CHECK: atomicrmw add i32* %val, i32 1 seq_cst
+  // CHECK: atomicrmw add i32* %val, i32 1 seq_cst, align 4
 
   old = __sync_sub_and_fetch(&val, 2);
-  // CHECK: atomicrmw sub i32* %val, i32 2 seq_cst
+  // CHECK: atomicrmw sub i32* %val, i32 2 seq_cst, align 4
 
   old = __sync_and_and_fetch(&valc, 3);
-  // CHECK: atomicrmw and i8* %valc, i8 3 seq_cst
+  // CHECK: atomicrmw and i8* %valc, i8 3 seq_cst, align 1
 
   old = __sync_or_and_fetch(&valc, 4);
-  // CHECK: atomicrmw or i8* %valc, i8 4 seq_cst
+  // CHECK: atomicrmw or i8* %valc, i8 4 seq_cst, align 1
 
   old = __sync_xor_and_fetch(&valc, 5);
-  // CHECK: atomicrmw xor i8* %valc, i8 5 seq_cst  
+  // CHECK: atomicrmw xor i8* %valc, i8 5 seq_cst, align 1
  
   old = __sync_nand_and_fetch(&valc, 6);
-  // CHECK: atomicrmw nand i8* %valc, i8 6 seq_cst  
+  // CHECK: atomicrmw nand i8* %valc, i8 6 seq_cst, align 1
  
   __sync_val_compare_and_swap((void **)0, (void *)0, (void *)0);
-  // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* null, i32 0, i32 0 seq_cst
+  // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i32* null, i32 0, i32 0 seq_cst seq_cst, align 4
   // CHECK: extractvalue { i32, i1 } [[PAIR]], 0
 
   if ( __sync_val_compare_and_swap(&valb, 0, 1)) {
-    // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i8* %valb, i8 0, i8 1 seq_cst
+    // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg i8* %valb, i8 0, i8 1 seq_cst seq_cst, align 1
     // CHECK: [[VAL:%[a-z0-9_.]+]] = extractvalue { i8, i1 } [[PAIR]], 0
     // CHECK: trunc i8 [[VAL]] to i1
     old = 42;
   }
   
   __sync_bool_compare_and_swap((void **)0, (void *)0, (void *)0);
-  // CHECK: cmpxchg i32* null, i32 0, i32 0 seq_cst
+  // CHECK: cmpxchg i32* null, i32 0, i32 0 seq_cst seq_cst, align 4
   
   __sync_lock_release(&val);
   // CHECK: store atomic i32 0, {{.*}} release, align 4
@@ -110,11 +110,11 @@ void release_return(int *lock) {
 // CHECK: @addrspace
 void addrspace(int  __attribute__((address_space(256))) * P) {
   __sync_bool_compare_and_swap(P, 0, 1);
-  // CHECK: cmpxchg i32 addrspace(256)*{{.*}}, i32 0, i32 1 seq_cst  
+  // CHECK: cmpxchg i32 addrspace(256)*{{.*}}, i32 0, i32 1 seq_cst seq_cst, align 4
 
   __sync_val_compare_and_swap(P, 0, 1);
-  // CHECK: cmpxchg i32 addrspace(256)*{{.*}}, i32 0, i32 1 seq_cst  
+  // CHECK: cmpxchg i32 addrspace(256)*{{.*}}, i32 0, i32 1 seq_cst seq_cst, align 4
 
   __sync_xor_and_fetch(P, 123);
-  // CHECK: atomicrmw xor i32 addrspace(256)*{{.*}}, i32 123 seq_cst  
+  // CHECK: atomicrmw xor i32 addrspace(256)*{{.*}}, i32 123 seq_cst, align 4
 }

diff  --git a/clang/test/CodeGen/atomic_ops.c b/clang/test/CodeGen/atomic_ops.c
index 79a1e5dba780..f3754888a1c0 100644
--- a/clang/test/CodeGen/atomic_ops.c
+++ b/clang/test/CodeGen/atomic_ops.c
@@ -10,17 +10,17 @@ void foo(int x)
   // Check that multiply / divides on atomics produce a cmpxchg loop
   i *= 2;
   // NATIVE: mul nsw i32
-  // NATIVE: cmpxchg i32*
+  // NATIVE: cmpxchg i32* {{.*}} seq_cst, align 4
   // LIBCALL: mul nsw i32
   // LIBCALL: i1 @__atomic_compare_exchange(i32 4,
   i /= 2;
   // NATIVE: sdiv i32
-  // NATIVE: cmpxchg i32*
+  // NATIVE: cmpxchg i32* {{.*}} seq_cst, align 4
   // LIBCALL: sdiv i32
   // LIBCALL: i1 @__atomic_compare_exchange(i32 4,
   j /= x;
   // NATIVE: sdiv i32
-  // NATIVE: cmpxchg i16*
+  // NATIVE: cmpxchg i16* {{.*}} seq_cst, align 2
   // LIBCALL: sdiv i32
   // LIBCALL: i1 @__atomic_compare_exchange(i32 2,
 
@@ -33,7 +33,7 @@ extern _Atomic _Bool b;
 
 _Bool bar() {
 // NATIVE-LABEL: @bar
-// NATIVE: %[[load:.*]] = load atomic i8, i8* @b seq_cst
+// NATIVE: %[[load:.*]] = load atomic i8, i8* @b seq_cst, align 1
 // NATIVE: %[[tobool:.*]] = trunc i8 %[[load]] to i1
 // NATIVE: ret i1 %[[tobool]]
 // LIBCALL-LABEL: @bar
@@ -49,7 +49,7 @@ extern _Atomic(_Complex int) x;
 
 void baz(int y) {
 // NATIVE-LABEL: @baz
-// NATIVE: store atomic
+// NATIVE: store atomic i64 {{.*}} seq_cst, align 8
 // LIBCALL-LABEL: @baz
 // LIBCALL: call void @__atomic_store
 
@@ -60,7 +60,7 @@ void baz(int y) {
 
 _Atomic(int) compound_add(_Atomic(int) in) {
 // CHECK-LABEL: @compound_add
-// CHECK: [[OLD:%.*]] = atomicrmw add i32* {{.*}}, i32 5 seq_cst
+// CHECK: [[OLD:%.*]] = atomicrmw add i32* {{.*}}, i32 5 seq_cst, align 4
 // CHECK: [[NEW:%.*]] = add i32 [[OLD]], 5
 // CHECK: ret i32 [[NEW]]
 
@@ -69,7 +69,7 @@ _Atomic(int) compound_add(_Atomic(int) in) {
 
 _Atomic(int) compound_sub(_Atomic(int) in) {
 // CHECK-LABEL: @compound_sub
-// CHECK: [[OLD:%.*]] = atomicrmw sub i32* {{.*}}, i32 5 seq_cst
+// CHECK: [[OLD:%.*]] = atomicrmw sub i32* {{.*}}, i32 5 seq_cst, align 4
 // CHECK: [[NEW:%.*]] = sub i32 [[OLD]], 5
 // CHECK: ret i32 [[NEW]]
 
@@ -78,7 +78,7 @@ _Atomic(int) compound_sub(_Atomic(int) in) {
 
 _Atomic(int) compound_xor(_Atomic(int) in) {
 // CHECK-LABEL: @compound_xor
-// CHECK: [[OLD:%.*]] = atomicrmw xor i32* {{.*}}, i32 5 seq_cst
+// CHECK: [[OLD:%.*]] = atomicrmw xor i32* {{.*}}, i32 5 seq_cst, align 4
 // CHECK: [[NEW:%.*]] = xor i32 [[OLD]], 5
 // CHECK: ret i32 [[NEW]]
 
@@ -87,7 +87,7 @@ _Atomic(int) compound_xor(_Atomic(int) in) {
 
 _Atomic(int) compound_or(_Atomic(int) in) {
 // CHECK-LABEL: @compound_or
-// CHECK: [[OLD:%.*]] = atomicrmw or i32* {{.*}}, i32 5 seq_cst
+// CHECK: [[OLD:%.*]] = atomicrmw or i32* {{.*}}, i32 5 seq_cst, align 4
 // CHECK: [[NEW:%.*]] = or i32 [[OLD]], 5
 // CHECK: ret i32 [[NEW]]
 
@@ -96,7 +96,7 @@ _Atomic(int) compound_or(_Atomic(int) in) {
 
 _Atomic(int) compound_and(_Atomic(int) in) {
 // CHECK-LABEL: @compound_and
-// CHECK: [[OLD:%.*]] = atomicrmw and i32* {{.*}}, i32 5 seq_cst
+// CHECK: [[OLD:%.*]] = atomicrmw and i32* {{.*}}, i32 5 seq_cst, align 4
 // CHECK: [[NEW:%.*]] = and i32 [[OLD]], 5
 // CHECK: ret i32 [[NEW]]
 
@@ -105,7 +105,7 @@ _Atomic(int) compound_and(_Atomic(int) in) {
 
 _Atomic(int) compound_mul(_Atomic(int) in) {
 // NATIVE-LABEL: @compound_mul
-// NATIVE: cmpxchg i32* {{%.*}}, i32 {{%.*}}, i32 [[NEW:%.*]] seq_cst seq_cst
+// NATIVE: cmpxchg i32* {{%.*}}, i32 {{%.*}}, i32 [[NEW:%.*]] seq_cst seq_cst, align 4
 // NATIVE: ret i32 [[NEW]]
 // LIBCALL-LABEL: @compound_mul
 // LIBCALL: i1 @__atomic_compare_exchange(i32 4,

diff  --git a/clang/test/CodeGen/atomics-inlining.c b/clang/test/CodeGen/atomics-inlining.c
index aa1ebef8d815..1931a19c3f31 100644
--- a/clang/test/CodeGen/atomics-inlining.c
+++ b/clang/test/CodeGen/atomics-inlining.c
@@ -49,60 +49,60 @@ void test1(void) {
 // ARM: call{{.*}} void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 
 // PPC32-LABEL: define{{.*}} void @test1
-// PPC32: = load atomic i8, i8* @c1 seq_cst
-// PPC32: store atomic i8 {{.*}}, i8* @c1 seq_cst
-// PPC32: = load atomic i16, i16* @s1 seq_cst
-// PPC32: store atomic i16 {{.*}}, i16* @s1 seq_cst
-// PPC32: = load atomic i32, i32* @i1 seq_cst
-// PPC32: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// PPC32: = load atomic i8, i8* @c1 seq_cst, align 1
+// PPC32: store atomic i8 {{.*}}, i8* @c1 seq_cst, align 1
+// PPC32: = load atomic i16, i16* @s1 seq_cst, align 2
+// PPC32: store atomic i16 {{.*}}, i16* @s1 seq_cst, align 2
+// PPC32: = load atomic i32, i32* @i1 seq_cst, align 4
+// PPC32: store atomic i32 {{.*}}, i32* @i1 seq_cst, align 4
 // PPC32: = call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
 // PPC32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
 // PPC32: call void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 // PPC32: call void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 
 // PPC64-LABEL: define{{.*}} void @test1
-// PPC64: = load atomic i8, i8* @c1 seq_cst
-// PPC64: store atomic i8 {{.*}}, i8* @c1 seq_cst
-// PPC64: = load atomic i16, i16* @s1 seq_cst
-// PPC64: store atomic i16 {{.*}}, i16* @s1 seq_cst
-// PPC64: = load atomic i32, i32* @i1 seq_cst
-// PPC64: store atomic i32 {{.*}}, i32* @i1 seq_cst
-// PPC64: = load atomic i64, i64* @ll1 seq_cst
-// PPC64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
+// PPC64: = load atomic i8, i8* @c1 seq_cst, align 1
+// PPC64: store atomic i8 {{.*}}, i8* @c1 seq_cst, align 1
+// PPC64: = load atomic i16, i16* @s1 seq_cst, align 2
+// PPC64: store atomic i16 {{.*}}, i16* @s1 seq_cst, align 2
+// PPC64: = load atomic i32, i32* @i1 seq_cst, align 4
+// PPC64: store atomic i32 {{.*}}, i32* @i1 seq_cst, align 4
+// PPC64: = load atomic i64, i64* @ll1 seq_cst, align 8
+// PPC64: store atomic i64 {{.*}}, i64* @ll1 seq_cst, align 8
 // PPC64: call void @__atomic_load(i64 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 // PPC64: call void @__atomic_store(i64 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 
 // MIPS32-LABEL: define{{.*}} void @test1
-// MIPS32: = load atomic i8, i8* @c1 seq_cst
-// MIPS32: store atomic i8 {{.*}}, i8* @c1 seq_cst
-// MIPS32: = load atomic i16, i16* @s1 seq_cst
-// MIPS32: store atomic i16 {{.*}}, i16* @s1 seq_cst
-// MIPS32: = load atomic i32, i32* @i1 seq_cst
-// MIPS32: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// MIPS32: = load atomic i8, i8* @c1 seq_cst, align 1
+// MIPS32: store atomic i8 {{.*}}, i8* @c1 seq_cst, align 1
+// MIPS32: = load atomic i16, i16* @s1 seq_cst, align 2
+// MIPS32: store atomic i16 {{.*}}, i16* @s1 seq_cst, align 2
+// MIPS32: = load atomic i32, i32* @i1 seq_cst, align 4
+// MIPS32: store atomic i32 {{.*}}, i32* @i1 seq_cst, align 4
 // MIPS32: call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
 // MIPS32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
 // MIPS32: call void @__atomic_load(i32 signext 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 // MIPS32: call void @__atomic_store(i32 signext 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 
 // MIPS64-LABEL: define{{.*}} void @test1
-// MIPS64: = load atomic i8, i8* @c1 seq_cst
-// MIPS64: store atomic i8 {{.*}}, i8* @c1 seq_cst
-// MIPS64: = load atomic i16, i16* @s1 seq_cst
-// MIPS64: store atomic i16 {{.*}}, i16* @s1 seq_cst
-// MIPS64: = load atomic i32, i32* @i1 seq_cst
-// MIPS64: store atomic i32 {{.*}}, i32* @i1 seq_cst
-// MIPS64: = load atomic i64, i64* @ll1 seq_cst
-// MIPS64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
+// MIPS64: = load atomic i8, i8* @c1 seq_cst, align 1
+// MIPS64: store atomic i8 {{.*}}, i8* @c1 seq_cst, align 1
+// MIPS64: = load atomic i16, i16* @s1 seq_cst, align 2
+// MIPS64: store atomic i16 {{.*}}, i16* @s1 seq_cst, align 2
+// MIPS64: = load atomic i32, i32* @i1 seq_cst, align 4
+// MIPS64: store atomic i32 {{.*}}, i32* @i1 seq_cst, align 4
+// MIPS64: = load atomic i64, i64* @ll1 seq_cst, align 8
+// MIPS64: store atomic i64 {{.*}}, i64* @ll1 seq_cst, align 8
 // MIPS64: call void @__atomic_load(i64 zeroext 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0)
 // MIPS64: call void @__atomic_store(i64 zeroext 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 
 // SPARC-LABEL: define{{.*}} void @test1
-// SPARC: = load atomic i8, i8* @c1 seq_cst
-// SPARC: store atomic i8 {{.*}}, i8* @c1 seq_cst
-// SPARC: = load atomic i16, i16* @s1 seq_cst
-// SPARC: store atomic i16 {{.*}}, i16* @s1 seq_cst
-// SPARC: = load atomic i32, i32* @i1 seq_cst
-// SPARC: store atomic i32 {{.*}}, i32* @i1 seq_cst
+// SPARC: = load atomic i8, i8* @c1 seq_cst, align 1
+// SPARC: store atomic i8 {{.*}}, i8* @c1 seq_cst, align 1
+// SPARC: = load atomic i16, i16* @s1 seq_cst, align 2
+// SPARC: store atomic i16 {{.*}}, i16* @s1 seq_cst, align 2
+// SPARC: = load atomic i32, i32* @i1 seq_cst, align 4
+// SPARC: store atomic i32 {{.*}}, i32* @i1 seq_cst, align 4
 // SPARCV8: call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
 // SPARCV8: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
 // SPARCV9: load atomic i64, i64* @ll1 seq_cst, align 8

diff  --git a/clang/test/CodeGen/big-atomic-ops.c b/clang/test/CodeGen/big-atomic-ops.c
index b06302f73f96..c584ffcf7cf7 100644
--- a/clang/test/CodeGen/big-atomic-ops.c
+++ b/clang/test/CodeGen/big-atomic-ops.c
@@ -16,13 +16,13 @@ typedef enum memory_order {
 
 int fi1(_Atomic(int) *i) {
   // CHECK: @fi1
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   return __c11_atomic_load(i, memory_order_seq_cst);
 }
 
 int fi1a(int *i) {
   // CHECK: @fi1a
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   int v;
   __atomic_load(i, &v, memory_order_seq_cst);
   return v;
@@ -30,60 +30,60 @@ int fi1a(int *i) {
 
 int fi1b(int *i) {
   // CHECK: @fi1b
-  // CHECK: load atomic i32, i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
   return __atomic_load_n(i, memory_order_seq_cst);
 }
 
 void fi2(_Atomic(int) *i) {
   // CHECK: @fi2
-  // CHECK: store atomic i32 {{.*}} seq_cst
+  // CHECK: store atomic i32 {{.*}} seq_cst, align 4
   __c11_atomic_store(i, 1, memory_order_seq_cst);
 }
 
 void fi2a(int *i) {
   // CHECK: @fi2a
-  // CHECK: store atomic i32 {{.*}} seq_cst
+  // CHECK: store atomic i32 {{.*}} seq_cst, align 4
   int v = 1;
   __atomic_store(i, &v, memory_order_seq_cst);
 }
 
 void fi2b(int *i) {
   // CHECK: @fi2b
-  // CHECK: store atomic i32 {{.*}} seq_cst
+  // CHECK: store atomic i32 {{.*}} seq_cst, align 4
   __atomic_store_n(i, 1, memory_order_seq_cst);
 }
 
 int fi3(_Atomic(int) *i) {
   // CHECK: @fi3
-  // CHECK: atomicrmw and
+  // CHECK: atomicrmw and {{.*}} seq_cst, align 4
   // CHECK-NOT: and
   return __c11_atomic_fetch_and(i, 1, memory_order_seq_cst);
 }
 
 int fi3a(int *i) {
   // CHECK: @fi3a
-  // CHECK: atomicrmw xor
+  // CHECK: atomicrmw xor {{.*}} seq_cst, align 4
   // CHECK-NOT: xor
   return __atomic_fetch_xor(i, 1, memory_order_seq_cst);
 }
 
 int fi3b(int *i) {
   // CHECK: @fi3b
-  // CHECK: atomicrmw add
+  // CHECK: atomicrmw add {{.*}} seq_cst, align 4
   // CHECK: add
   return __atomic_add_fetch(i, 1, memory_order_seq_cst);
 }
 
 int fi3c(int *i) {
   // CHECK: @fi3c
-  // CHECK: atomicrmw nand
+  // CHECK: atomicrmw nand {{.*}} seq_cst, align 4
   // CHECK-NOT: and
   return __atomic_fetch_nand(i, 1, memory_order_seq_cst);
 }
 
 int fi3d(int *i) {
   // CHECK: @fi3d
-  // CHECK: atomicrmw nand
+  // CHECK: atomicrmw nand {{.*}} seq_cst, align 4
   // CHECK: and
   // CHECK: xor
   return __atomic_nand_fetch(i, 1, memory_order_seq_cst);
@@ -91,14 +91,14 @@ int fi3d(int *i) {
 
 _Bool fi4(_Atomic(int) *i) {
   // CHECK: @fi4
-  // CHECK: cmpxchg i32*
+  // CHECK: cmpxchg i32* {{.*}} acquire acquire, align 4
   int cmp = 0;
   return __c11_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire);
 }
 
 _Bool fi4a(int *i) {
   // CHECK: @fi4
-  // CHECK: cmpxchg i32*
+  // CHECK: cmpxchg i32* {{.*}} acquire acquire, align 4
   int cmp = 0;
   int desired = 1;
   return __atomic_compare_exchange(i, &cmp, &desired, 0, memory_order_acquire, memory_order_acquire);
@@ -106,20 +106,20 @@ _Bool fi4a(int *i) {
 
 _Bool fi4b(int *i) {
   // CHECK: @fi4
-  // CHECK: cmpxchg weak i32*
+  // CHECK: cmpxchg weak i32* {{.*}} acquire acquire, align 4
   int cmp = 0;
   return __atomic_compare_exchange_n(i, &cmp, 1, 1, memory_order_acquire, memory_order_acquire);
 }
 
 float ff1(_Atomic(float) *d) {
   // CHECK: @ff1
-  // CHECK: load atomic i32, i32* {{.*}} monotonic
+  // CHECK: load atomic i32, i32* {{.*}} monotonic, align 4
   return __c11_atomic_load(d, memory_order_relaxed);
 }
 
 void ff2(_Atomic(float) *d) {
   // CHECK: @ff2
-  // CHECK: store atomic i32 {{.*}} release
+  // CHECK: store atomic i32 {{.*}} release, align 4
   __c11_atomic_store(d, 1, memory_order_release);
 }
 
@@ -129,41 +129,41 @@ float ff3(_Atomic(float) *d) {
 
 int* fp1(_Atomic(int*) *p) {
   // CHECK: @fp1
-  // CHECK: load atomic i64, i64* {{.*}} seq_cst
+  // CHECK: load atomic i64, i64* {{.*}} seq_cst, align 8
   return __c11_atomic_load(p, memory_order_seq_cst);
 }
 
 int* fp2(_Atomic(int*) *p) {
   // CHECK: @fp2
   // CHECK: store i64 4
-  // CHECK: atomicrmw add {{.*}} monotonic
+  // CHECK: atomicrmw add {{.*}} monotonic, align 8
   return __c11_atomic_fetch_add(p, 1, memory_order_relaxed);
 }
 
 int *fp2a(int **p) {
   // CHECK: @fp2a
   // CHECK: store i64 4
-  // CHECK: atomicrmw sub {{.*}} monotonic
+  // CHECK: atomicrmw sub {{.*}} monotonic, align 8
   // Note, the GNU builtins do not multiply by sizeof(T)!
   return __atomic_fetch_sub(p, 4, memory_order_relaxed);
 }
 
 _Complex float fc(_Atomic(_Complex float) *c) {
   // CHECK: @fc
-  // CHECK: atomicrmw xchg i64*
+  // CHECK: atomicrmw xchg i64* {{.*}} seq_cst, align 8
   return __c11_atomic_exchange(c, 2, memory_order_seq_cst);
 }
 
 typedef struct X { int x; } X;
 X fs(_Atomic(X) *c) {
   // CHECK: @fs
-  // CHECK: atomicrmw xchg i32*
+  // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4
   return __c11_atomic_exchange(c, (X){2}, memory_order_seq_cst);
 }
 
 X fsa(X *c, X *d) {
   // CHECK: @fsa
-  // CHECK: atomicrmw xchg i32*
+  // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4
   X ret;
   __atomic_exchange(c, d, &ret, memory_order_seq_cst);
   return ret;
@@ -171,20 +171,20 @@ X fsa(X *c, X *d) {
 
 _Bool fsb(_Bool *c) {
   // CHECK: @fsb
-  // CHECK: atomicrmw xchg i8*
+  // CHECK: atomicrmw xchg i8* {{.*}} seq_cst, align 1
   return __atomic_exchange_n(c, 1, memory_order_seq_cst);
 }
 
 char flag1;
 volatile char flag2;
 void test_and_set() {
-  // CHECK: atomicrmw xchg i8* @flag1, i8 1 seq_cst
+  // CHECK: atomicrmw xchg i8* @flag1, i8 1 seq_cst, align 1
   __atomic_test_and_set(&flag1, memory_order_seq_cst);
-  // CHECK: atomicrmw volatile xchg i8* @flag2, i8 1 acquire
+  // CHECK: atomicrmw volatile xchg i8* @flag2, i8 1 acquire, align 1
   __atomic_test_and_set(&flag2, memory_order_acquire);
-  // CHECK: store atomic volatile i8 0, i8* @flag2 release
+  // CHECK: store atomic volatile i8 0, i8* @flag2 release, align 1
   __atomic_clear(&flag2, memory_order_release);
-  // CHECK: store atomic i8 0, i8* @flag1 seq_cst
+  // CHECK: store atomic i8 0, i8* @flag1 seq_cst, align 1
   __atomic_clear(&flag1, memory_order_seq_cst);
 }
 

diff  --git a/clang/test/CodeGen/bittest-intrin.c b/clang/test/CodeGen/bittest-intrin.c
index 25e367384d78..0add34d61e6f 100644
--- a/clang/test/CodeGen/bittest-intrin.c
+++ b/clang/test/CodeGen/bittest-intrin.c
@@ -107,7 +107,7 @@ void test_arm(long *base, long idx) {
 // ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
 // ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
 // ARM: %[[NOTMASK:[^ ]*]] = xor i8 %[[MASK]], -1
-// ARM: %[[BYTE:[^ ]*]] = atomicrmw and i8* %[[BYTEADDR]], i8 %[[NOTMASK]] seq_cst
+// ARM: %[[BYTE:[^ ]*]] = atomicrmw and i8* %[[BYTEADDR]], i8 %[[NOTMASK]] seq_cst, align 1
 // ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
 // ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
 // ARM: store volatile i8 %[[RES]], i8* @sink, align 1
@@ -118,7 +118,7 @@ void test_arm(long *base, long idx) {
 // ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
 // ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
 // ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
-// ARM: %[[BYTE:[^ ]*]] = atomicrmw or i8* %[[BYTEADDR]], i8 %[[MASK]] seq_cst
+// ARM: %[[BYTE:[^ ]*]] = atomicrmw or i8* %[[BYTEADDR]], i8 %[[MASK]] seq_cst, align 1
 // ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
 // ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
 // ARM: store volatile i8 %[[RES]], i8* @sink, align 1
@@ -127,9 +127,9 @@ void test_arm(long *base, long idx) {
 // Just look for the atomicrmw instructions.
 
 // ARM-LABEL: define dso_local {{.*}}void @test_arm(i32* %base, i32 %idx)
-// ARM: atomicrmw and i8* %{{.*}}, i8 {{.*}} acquire
-// ARM: atomicrmw and i8* %{{.*}}, i8 {{.*}} release
-// ARM: atomicrmw and i8* %{{.*}}, i8 {{.*}} monotonic
-// ARM: atomicrmw or i8* %{{.*}}, i8 {{.*}} acquire
-// ARM: atomicrmw or i8* %{{.*}}, i8 {{.*}} release
-// ARM: atomicrmw or i8* %{{.*}}, i8 {{.*}} monotonic
+// ARM: atomicrmw and i8* %{{.*}}, i8 {{.*}} acquire, align 1
+// ARM: atomicrmw and i8* %{{.*}}, i8 {{.*}} release, align 1
+// ARM: atomicrmw and i8* %{{.*}}, i8 {{.*}} monotonic, align 1
+// ARM: atomicrmw or i8* %{{.*}}, i8 {{.*}} acquire, align 1
+// ARM: atomicrmw or i8* %{{.*}}, i8 {{.*}} release, align 1
+// ARM: atomicrmw or i8* %{{.*}}, i8 {{.*}} monotonic, align 1

diff  --git a/clang/test/CodeGen/builtins-nvptx-ptx50.cu b/clang/test/CodeGen/builtins-nvptx-ptx50.cu
index 4436ff523cf6..9d92c63416e3 100644
--- a/clang/test/CodeGen/builtins-nvptx-ptx50.cu
+++ b/clang/test/CodeGen/builtins-nvptx-ptx50.cu
@@ -17,7 +17,7 @@
 
 // CHECK-LABEL: test_fn
 __device__ void test_fn(double d, double* double_ptr) {
-  // CHECK: atomicrmw fadd double
+  // CHECK: atomicrmw fadd double* {{.*}} seq_cst, align 8
   // expected-error at +1 {{'__nvvm_atom_add_gen_d' needs target feature sm_60}}
   __nvvm_atom_add_gen_d(double_ptr, d);
 }

diff  --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index 1f7a8c62ac2c..913da5d7b73b 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -201,85 +201,85 @@ __shared__ long long sll;
 __device__ void nvvm_atom(float *fp, float f, double *dfp, double df, int *ip,
                           int i, unsigned int *uip, unsigned ui, long *lp,
                           long l, long long *llp, long long ll) {
-  // CHECK: atomicrmw add
+  // CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4
   __nvvm_atom_add_gen_i(ip, i);
-  // CHECK: atomicrmw add
+  // CHECK: atomicrmw add i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_add_gen_l(&dl, l);
-  // CHECK: atomicrmw add
+  // CHECK: atomicrmw add i64* {{.*}} seq_cst, align 8
   __nvvm_atom_add_gen_ll(&sll, ll);
 
-  // CHECK: atomicrmw sub
+  // CHECK: atomicrmw sub i32* {{.*}} seq_cst, align 4
   __nvvm_atom_sub_gen_i(ip, i);
-  // CHECK: atomicrmw sub
+  // CHECK: atomicrmw sub i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_sub_gen_l(&dl, l);
-  // CHECK: atomicrmw sub
+  // CHECK: atomicrmw sub i64* {{.*}} seq_cst, align 8
   __nvvm_atom_sub_gen_ll(&sll, ll);
 
-  // CHECK: atomicrmw and
+  // CHECK: atomicrmw and i32* {{.*}} seq_cst, align 4
   __nvvm_atom_and_gen_i(ip, i);
-  // CHECK: atomicrmw and
+  // CHECK: atomicrmw and i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_and_gen_l(&dl, l);
-  // CHECK: atomicrmw and
+  // CHECK: atomicrmw and i64* {{.*}} seq_cst, align 8
   __nvvm_atom_and_gen_ll(&sll, ll);
 
-  // CHECK: atomicrmw or
+  // CHECK: atomicrmw or i32* {{.*}} seq_cst, align 4
   __nvvm_atom_or_gen_i(ip, i);
-  // CHECK: atomicrmw or
+  // CHECK: atomicrmw or i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_or_gen_l(&dl, l);
-  // CHECK: atomicrmw or
+  // CHECK: atomicrmw or i64* {{.*}} seq_cst, align 8
   __nvvm_atom_or_gen_ll(&sll, ll);
 
-  // CHECK: atomicrmw xor
+  // CHECK: atomicrmw xor i32* {{.*}} seq_cst, align 4
   __nvvm_atom_xor_gen_i(ip, i);
-  // CHECK: atomicrmw xor
+  // CHECK: atomicrmw xor i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_xor_gen_l(&dl, l);
-  // CHECK: atomicrmw xor
+  // CHECK: atomicrmw xor i64* {{.*}} seq_cst, align 8
   __nvvm_atom_xor_gen_ll(&sll, ll);
 
-  // CHECK: atomicrmw xchg
+  // CHECK: atomicrmw xchg i32* {{.*}} seq_cst, align 4
   __nvvm_atom_xchg_gen_i(ip, i);
-  // CHECK: atomicrmw xchg
+  // CHECK: atomicrmw xchg i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_xchg_gen_l(&dl, l);
-  // CHECK: atomicrmw xchg
+  // CHECK: atomicrmw xchg i64* {{.*}} seq_cst, align 8
   __nvvm_atom_xchg_gen_ll(&sll, ll);
 
-  // CHECK: atomicrmw max i32*
+  // CHECK: atomicrmw max i32* {{.*}} seq_cst, align 4
   __nvvm_atom_max_gen_i(ip, i);
-  // CHECK: atomicrmw umax i32*
+  // CHECK: atomicrmw umax i32* {{.*}} seq_cst, align 4
   __nvvm_atom_max_gen_ui((unsigned int *)ip, i);
-  // CHECK: atomicrmw max
+  // CHECK: atomicrmw max i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_max_gen_l(&dl, l);
-  // CHECK: atomicrmw umax
+  // CHECK: atomicrmw umax i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_max_gen_ul((unsigned long *)&dl, l);
-  // CHECK: atomicrmw max i64*
+  // CHECK: atomicrmw max i64* {{.*}} seq_cst, align 8
   __nvvm_atom_max_gen_ll(&sll, ll);
-  // CHECK: atomicrmw umax i64*
+  // CHECK: atomicrmw umax i64* {{.*}} seq_cst, align 8
   __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll);
 
-  // CHECK: atomicrmw min i32*
+  // CHECK: atomicrmw min i32* {{.*}} seq_cst, align 4
   __nvvm_atom_min_gen_i(ip, i);
-  // CHECK: atomicrmw umin i32*
+  // CHECK: atomicrmw umin i32* {{.*}} seq_cst, align 4
   __nvvm_atom_min_gen_ui((unsigned int *)ip, i);
-  // CHECK: atomicrmw min
+  // CHECK: atomicrmw min i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_min_gen_l(&dl, l);
-  // CHECK: atomicrmw umin
+  // CHECK: atomicrmw umin i{{32|64}}* {{.*}} seq_cst, align {{4|8}}
   __nvvm_atom_min_gen_ul((unsigned long *)&dl, l);
-  // CHECK: atomicrmw min i64*
+  // CHECK: atomicrmw min i64* {{.*}} seq_cst, align 8
   __nvvm_atom_min_gen_ll(&sll, ll);
-  // CHECK: atomicrmw umin i64*
+  // CHECK: atomicrmw umin i64* {{.*}} seq_cst, align 8
   __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
 
-  // CHECK: cmpxchg
+  // CHECK: cmpxchg i32* {{.*}} seq_cst seq_cst, align 4
   // CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0
   __nvvm_atom_cas_gen_i(ip, 0, i);
-  // CHECK: cmpxchg
+  // CHECK: cmpxchg i{{32|64}}* {{.*}} seq_cst seq_cst, align {{4|8}}
   // CHECK-NEXT: extractvalue { {{i32|i64}}, i1 } {{%[0-9]+}}, 0
   __nvvm_atom_cas_gen_l(&dl, 0, l);
-  // CHECK: cmpxchg
+  // CHECK: cmpxchg i64* {{.*}} seq_cst seq_cst, align 8
   // CHECK-NEXT: extractvalue { i64, i1 } {{%[0-9]+}}, 0
   __nvvm_atom_cas_gen_ll(&sll, 0, ll);
 
-  // CHECK: atomicrmw fadd float
+  // CHECK: atomicrmw fadd float* {{.*}} seq_cst, align 4
   __nvvm_atom_add_gen_f(fp, f);
 
   // CHECK: call i32 @llvm.nvvm.atomic.load.inc.32.p0i32

diff  --git a/clang/test/CodeGen/c11atomics-ios.c b/clang/test/CodeGen/c11atomics-ios.c
index bb632a249b79..0aac32e4c615 100644
--- a/clang/test/CodeGen/c11atomics-ios.c
+++ b/clang/test/CodeGen/c11atomics-ios.c
@@ -265,7 +265,7 @@ PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
   // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
   // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
   // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8
-  // CHECK:   [[RES:%.*]] = atomicrmw xchg i64* [[ADDR64]], i64 [[VAL64]] seq_cst
+  // CHECK:   [[RES:%.*]] = atomicrmw xchg i64* [[ADDR64]], i64 [[VAL64]] seq_cst, align 8
   // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
   // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
   // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
@@ -303,7 +303,7 @@ _Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
   // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
   // CHECK:   [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, i64* [[ATOMIC_DESIRED64]], align 8
   // CHECK:   [[ATOMIC_NEW_VAL64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 8
-  // CHECK:   [[RES:%.*]] = cmpxchg i64* [[ADDR64]], i64 [[ATOMIC_DESIRED_VAL64]], i64 [[ATOMIC_NEW_VAL64]] seq_cst seq_cst
+  // CHECK:   [[RES:%.*]] = cmpxchg i64* [[ADDR64]], i64 [[ATOMIC_DESIRED_VAL64]], i64 [[ATOMIC_NEW_VAL64]] seq_cst seq_cst, align 8
   // CHECK:   [[RES_VAL64:%.*]] = extractvalue { i64, i1 } [[RES]], 0
   // CHECK:   [[RES_BOOL:%.*]] = extractvalue { i64, i1 } [[RES]], 1
   // CHECK:   br i1 [[RES_BOOL]], label {{%.*}}, label {{%.*}}

diff  --git a/clang/test/CodeGen/c11atomics.c b/clang/test/CodeGen/c11atomics.c
index d9ec92a7e9b5..92fdad3134a6 100644
--- a/clang/test/CodeGen/c11atomics.c
+++ b/clang/test/CodeGen/c11atomics.c
@@ -45,13 +45,13 @@ _Atomic(vector) v;
 void testinc(void)
 {
   // Special case for suffix bool++, sets to true and returns the old value.
-  // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
+  // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst, align 1
   b++;
-  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
+  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst, align 4
   i++;
-  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
+  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst, align 8
   l++;
-  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
+  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst, align 2
   s++;
   // Prefix increment
   // Special case for bool: set to true and return true
@@ -60,13 +60,13 @@ void testinc(void)
   // Currently, we have no variant of atomicrmw that returns the new value, so
   // we have to generate an atomic add, which returns the old value, and then a
   // non-atomic add.
-  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
+  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst, align 4
   // CHECK: add i32
   ++i;
-  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
+  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst, align 8
   // CHECK: add i64
   ++l;
-  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
+  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst, align 2
   // CHECK: add i16
   ++s;
 }
@@ -75,21 +75,21 @@ void testdec(void)
 {
   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
   b--;
-  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
+  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst, align 4
   i--;
-  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
+  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst, align 8
   l--;
-  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
+  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst, align 2
   s--;
   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
   --b;
-  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
+  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst, align 4
   // CHECK: sub i32
   --i;
-  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
+  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst, align 8
   // CHECK: sub i64
   --l;
-  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
+  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst, align 2
   // CHECK: sub i16
   --s;
 }
@@ -97,9 +97,9 @@ void testdec(void)
 void testaddeq(void)
 {
   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
-  // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
-  // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
-  // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
+  // CHECK: atomicrmw add i32* @i, i32 42 seq_cst, align 4
+  // CHECK: atomicrmw add i64* @l, i64 42 seq_cst, align 8
+  // CHECK: atomicrmw add i16* @s, i16 42 seq_cst, align 2
   b += 42;
   i += 42;
   l += 42;
@@ -109,9 +109,9 @@ void testaddeq(void)
 void testsubeq(void)
 {
   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
-  // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
-  // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
-  // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
+  // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst, align 4
+  // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst, align 8
+  // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst, align 2
   b -= 42;
   i -= 42;
   l -= 42;
@@ -121,9 +121,9 @@ void testsubeq(void)
 void testxoreq(void)
 {
   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
-  // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
-  // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
-  // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
+  // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst, align 4
+  // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst, align 8
+  // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst, align 2
   b ^= 42;
   i ^= 42;
   l ^= 42;
@@ -133,9 +133,9 @@ void testxoreq(void)
 void testoreq(void)
 {
   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
-  // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
-  // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
-  // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
+  // CHECK: atomicrmw or i32* @i, i32 42 seq_cst, align 4
+  // CHECK: atomicrmw or i64* @l, i64 42 seq_cst, align 8
+  // CHECK: atomicrmw or i16* @s, i16 42 seq_cst, align 2
   b |= 42;
   i |= 42;
   l |= 42;
@@ -145,9 +145,9 @@ void testoreq(void)
 void testandeq(void)
 {
   // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 1, i8* @b
-  // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
-  // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
-  // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
+  // CHECK: atomicrmw and i32* @i, i32 42 seq_cst, align 4
+  // CHECK: atomicrmw and i64* @l, i64 42 seq_cst, align 8
+  // CHECK: atomicrmw and i16* @s, i16 42 seq_cst, align 2
   b &= 42;
   i &= 42;
   l &= 42;

diff  --git a/clang/test/CodeGen/code-coverage-tsan.c b/clang/test/CodeGen/code-coverage-tsan.c
index baaf1049dc7d..5a48232a4c0b 100644
--- a/clang/test/CodeGen/code-coverage-tsan.c
+++ b/clang/test/CodeGen/code-coverage-tsan.c
@@ -5,7 +5,7 @@
 
 // CHECK-LABEL: void @foo()
 /// Two counters are incremented by __tsan_atomic64_fetch_add.
-// CHECK:         atomicrmw add i64* {{.*}} @__llvm_gcov_ctr
+// CHECK:         atomicrmw add i64* {{.*}} @__llvm_gcov_ctr{{.*}} monotonic, align 8
 // CHECK-NEXT:    atomicrmw sub i32*
 
 _Atomic(int) cnt;

diff  --git a/clang/test/CodeGen/linux-arm-atomic.c b/clang/test/CodeGen/linux-arm-atomic.c
index b8535f824827..8fcc6f7b567d 100644
--- a/clang/test/CodeGen/linux-arm-atomic.c
+++ b/clang/test/CodeGen/linux-arm-atomic.c
@@ -9,4 +9,4 @@ _Atomic_word exchange_and_add(volatile _Atomic_word *__mem, int __val) {
 }
 
 // CHECK: define {{.*}} @exchange_and_add
-// CHECK: atomicrmw {{.*}} add
+// CHECK: atomicrmw {{.*}} add i32* {{.*}} acq_rel, align 4

diff  --git a/clang/test/CodeGen/ms-intrinsics-other.c b/clang/test/CodeGen/ms-intrinsics-other.c
index 33f811587303..a1f337b8c0fa 100644
--- a/clang/test/CodeGen/ms-intrinsics-other.c
+++ b/clang/test/CodeGen/ms-intrinsics-other.c
@@ -87,7 +87,7 @@ LONG test_InterlockedExchange(LONG volatile *value, LONG mask) {
   return _InterlockedExchange(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedExchange(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -95,7 +95,7 @@ LONG test_InterlockedExchangeAdd(LONG volatile *value, LONG mask) {
   return _InterlockedExchangeAdd(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedExchangeAdd(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -103,7 +103,7 @@ LONG test_InterlockedExchangeSub(LONG volatile *value, LONG mask) {
   return _InterlockedExchangeSub(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedExchangeSub(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -111,7 +111,7 @@ LONG test_InterlockedOr(LONG volatile *value, LONG mask) {
   return _InterlockedOr(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedOr(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -119,7 +119,7 @@ LONG test_InterlockedXor(LONG volatile *value, LONG mask) {
   return _InterlockedXor(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedXor(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -127,7 +127,7 @@ LONG test_InterlockedAnd(LONG volatile *value, LONG mask) {
   return _InterlockedAnd(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedAnd(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -135,7 +135,7 @@ LONG test_InterlockedCompareExchange(LONG volatile *Destination, LONG Exchange,
   return _InterlockedCompareExchange(Destination, Exchange, Comperand);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedCompareExchange(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange seq_cst seq_cst
+// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange seq_cst seq_cst, align 4
 // CHECK: [[RESULT:%[0-9]+]] = extractvalue { i32, i1 } [[TMP]], 0
 // CHECK: ret i32 [[RESULT]]
 // CHECK: }
@@ -144,7 +144,7 @@ LONG test_InterlockedIncrement(LONG volatile *Addend) {
   return _InterlockedIncrement(Addend);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedIncrement(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 seq_cst
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 seq_cst, align 4
 // CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
 // CHECK: ret i32 [[RESULT]]
 // CHECK: }
@@ -153,7 +153,7 @@ LONG test_InterlockedDecrement(LONG volatile *Addend) {
   return _InterlockedDecrement(Addend);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedDecrement(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 seq_cst
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 seq_cst, align 4
 // CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
 // CHECK: ret i32 [[RESULT]]
 // CHECK: }
@@ -212,7 +212,7 @@ LONG test_InterlockedAdd(LONG volatile *Addend, LONG Value) {
 }
 
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedAdd(i32*{{[a-z_ ]*}}%Addend, i32 %Value) {{.*}} {
-// CHECK-ARM-ARM64: %[[OLDVAL:[0-9]+]] = atomicrmw add i32* %Addend, i32 %Value seq_cst
+// CHECK-ARM-ARM64: %[[OLDVAL:[0-9]+]] = atomicrmw add i32* %Addend, i32 %Value seq_cst, align 4
 // CHECK-ARM-ARM64: %[[NEWVAL:[0-9]+]] = add i32 %[[OLDVAL:[0-9]+]], %Value
 // CHECK-ARM-ARM64: ret i32 %[[NEWVAL:[0-9]+]]
 #endif
@@ -222,21 +222,21 @@ LONG test_InterlockedExchangeAdd_acq(LONG volatile *value, LONG mask) {
   return _InterlockedExchangeAdd_acq(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedExchangeAdd_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask acquire
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 LONG test_InterlockedExchangeAdd_rel(LONG volatile *value, LONG mask) {
   return _InterlockedExchangeAdd_rel(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedExchangeAdd_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask release
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask release, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 LONG test_InterlockedExchangeAdd_nf(LONG volatile *value, LONG mask) {
   return _InterlockedExchangeAdd_nf(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedExchangeAdd_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask monotonic
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -244,21 +244,21 @@ LONG test_InterlockedExchange_acq(LONG volatile *value, LONG mask) {
   return _InterlockedExchange_acq(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedExchange_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask acquire
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 LONG test_InterlockedExchange_rel(LONG volatile *value, LONG mask) {
   return _InterlockedExchange_rel(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedExchange_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask release
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask release, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 LONG test_InterlockedExchange_nf(LONG volatile *value, LONG mask) {
   return _InterlockedExchange_nf(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedExchange_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask monotonic
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -266,7 +266,7 @@ LONG test_InterlockedCompareExchange_acq(LONG volatile *Destination, LONG Exchan
   return _InterlockedCompareExchange_acq(Destination, Exchange, Comperand);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedCompareExchange_acq(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange acquire acquire
+// CHECK-ARM: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange acquire acquire, align 4
 // CHECK-ARM: [[RESULT:%[0-9]+]] = extractvalue { i32, i1 } [[TMP]], 0
 // CHECK-ARM: ret i32 [[RESULT]]
 // CHECK-ARM: }
@@ -275,7 +275,7 @@ LONG test_InterlockedCompareExchange_rel(LONG volatile *Destination, LONG Exchan
   return _InterlockedCompareExchange_rel(Destination, Exchange, Comperand);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedCompareExchange_rel(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange release monotonic
+// CHECK-ARM: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange release monotonic, align 4
 // CHECK-ARM: [[RESULT:%[0-9]+]] = extractvalue { i32, i1 } [[TMP]], 0
 // CHECK-ARM: ret i32 [[RESULT]]
 // CHECK-ARM: }
@@ -284,7 +284,7 @@ LONG test_InterlockedCompareExchange_nf(LONG volatile *Destination, LONG Exchang
   return _InterlockedCompareExchange_nf(Destination, Exchange, Comperand);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedCompareExchange_nf(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange monotonic monotonic
+// CHECK-ARM: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange monotonic monotonic, align 4
 // CHECK-ARM: [[RESULT:%[0-9]+]] = extractvalue { i32, i1 } [[TMP]], 0
 // CHECK-ARM: ret i32 [[RESULT]]
 // CHECK-ARM: }
@@ -293,7 +293,7 @@ LONG test_InterlockedOr_acq(LONG volatile *value, LONG mask) {
   return _InterlockedOr_acq(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedOr_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask acquire
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -301,7 +301,7 @@ LONG test_InterlockedOr_rel(LONG volatile *value, LONG mask) {
   return _InterlockedOr_rel(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedOr_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask release
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask release, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -309,7 +309,7 @@ LONG test_InterlockedOr_nf(LONG volatile *value, LONG mask) {
   return _InterlockedOr_nf(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedOr_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask monotonic
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -317,7 +317,7 @@ LONG test_InterlockedXor_acq(LONG volatile *value, LONG mask) {
   return _InterlockedXor_acq(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedXor_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask acquire
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -325,7 +325,7 @@ LONG test_InterlockedXor_rel(LONG volatile *value, LONG mask) {
   return _InterlockedXor_rel(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedXor_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask release
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask release, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -333,7 +333,7 @@ LONG test_InterlockedXor_nf(LONG volatile *value, LONG mask) {
   return _InterlockedXor_nf(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedXor_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask monotonic
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -341,7 +341,7 @@ LONG test_InterlockedAnd_acq(LONG volatile *value, LONG mask) {
   return _InterlockedAnd_acq(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedAnd_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask acquire
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -349,7 +349,7 @@ LONG test_InterlockedAnd_rel(LONG volatile *value, LONG mask) {
   return _InterlockedAnd_rel(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedAnd_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask release
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask release, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -357,7 +357,7 @@ LONG test_InterlockedAnd_nf(LONG volatile *value, LONG mask) {
   return _InterlockedAnd_nf(value, mask);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedAnd_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask monotonic
+// CHECK-ARM:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM: }
 
@@ -366,7 +366,7 @@ LONG test_InterlockedIncrement_acq(LONG volatile *Addend) {
   return _InterlockedIncrement_acq(Addend);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedIncrement_acq(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 acquire
+// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 acquire, align 4
 // CHECK-ARM: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
 // CHECK-ARM: ret i32 [[RESULT]]
 // CHECK-ARM: }
@@ -375,7 +375,7 @@ LONG test_InterlockedIncrement_rel(LONG volatile *Addend) {
   return _InterlockedIncrement_rel(Addend);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedIncrement_rel(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 release
+// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 release, align 4
 // CHECK-ARM: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
 // CHECK-ARM: ret i32 [[RESULT]]
 // CHECK-ARM: }
@@ -384,7 +384,7 @@ LONG test_InterlockedIncrement_nf(LONG volatile *Addend) {
   return _InterlockedIncrement_nf(Addend);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedIncrement_nf(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 monotonic
+// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 monotonic, align 4
 // CHECK-ARM: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
 // CHECK-ARM: ret i32 [[RESULT]]
 // CHECK-ARM: }
@@ -393,7 +393,7 @@ LONG test_InterlockedDecrement_acq(LONG volatile *Addend) {
   return _InterlockedDecrement_acq(Addend);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedDecrement_acq(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 acquire
+// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 acquire, align 4
 // CHECK-ARM: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
 // CHECK-ARM: ret i32 [[RESULT]]
 // CHECK-ARM: }
@@ -402,7 +402,7 @@ LONG test_InterlockedDecrement_rel(LONG volatile *Addend) {
   return _InterlockedDecrement_rel(Addend);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedDecrement_rel(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 release
+// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 release, align 4
 // CHECK-ARM: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
 // CHECK-ARM: ret i32 [[RESULT]]
 // CHECK-ARM: }
@@ -411,7 +411,7 @@ LONG test_InterlockedDecrement_nf(LONG volatile *Addend) {
   return _InterlockedDecrement_nf(Addend);
 }
 // CHECK-ARM: define{{.*}}i32 @test_InterlockedDecrement_nf(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 monotonic
+// CHECK-ARM: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 monotonic, align 4
 // CHECK-ARM: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
 // CHECK-ARM: ret i32 [[RESULT]]
 // CHECK-ARM: }

diff  --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c
index 90a97b5cb046..9f608158b167 100644
--- a/clang/test/CodeGen/ms-intrinsics.c
+++ b/clang/test/CodeGen/ms-intrinsics.c
@@ -217,7 +217,7 @@ void *test_InterlockedExchangePointer(void * volatile *Target, void *Value) {
 // CHECK: define{{.*}}i8* @test_InterlockedExchangePointer(i8** {{[a-z_ ]*}}%Target, i8* {{[a-z_ ]*}}%Value){{.*}}{
 // CHECK:   %[[TARGET:[0-9]+]] = bitcast i8** %Target to [[iPTR:i[0-9]+]]*
 // CHECK:   %[[VALUE:[0-9]+]] = ptrtoint i8* %Value to [[iPTR]]
-// CHECK:   %[[EXCHANGE:[0-9]+]] = atomicrmw xchg [[iPTR]]* %[[TARGET]], [[iPTR]] %[[VALUE]] seq_cst
+// CHECK:   %[[EXCHANGE:[0-9]+]] = atomicrmw xchg [[iPTR]]* %[[TARGET]], [[iPTR]] %[[VALUE]] seq_cst, align {{4|8}}
 // CHECK:   %[[RESULT:[0-9]+]] = inttoptr [[iPTR]] %[[EXCHANGE]] to i8*
 // CHECK:   ret i8* %[[RESULT]]
 // CHECK: }
@@ -231,7 +231,7 @@ void *test_InterlockedCompareExchangePointer(void * volatile *Destination,
 // CHECK:   %[[DEST:[0-9]+]] = bitcast i8** %Destination to [[iPTR]]*
 // CHECK:   %[[EXCHANGE:[0-9]+]] = ptrtoint i8* %Exchange to [[iPTR]]
 // CHECK:   %[[COMPARAND:[0-9]+]] = ptrtoint i8* %Comparand to [[iPTR]]
-// CHECK:   %[[XCHG:[0-9]+]] = cmpxchg volatile [[iPTR]]* %[[DEST:[0-9]+]], [[iPTR]] %[[COMPARAND:[0-9]+]], [[iPTR]] %[[EXCHANGE:[0-9]+]] seq_cst seq_cst
+// CHECK:   %[[XCHG:[0-9]+]] = cmpxchg volatile [[iPTR]]* %[[DEST:[0-9]+]], [[iPTR]] %[[COMPARAND:[0-9]+]], [[iPTR]] %[[EXCHANGE:[0-9]+]] seq_cst seq_cst, align {{4|8}}
 // CHECK:   %[[EXTRACT:[0-9]+]] = extractvalue { [[iPTR]], i1 } %[[XCHG]], 0
 // CHECK:   %[[RESULT:[0-9]+]] = inttoptr [[iPTR]] %[[EXTRACT]] to i8*
 // CHECK:   ret i8* %[[RESULT:[0-9]+]]
@@ -246,7 +246,7 @@ void *test_InterlockedCompareExchangePointer_nf(void * volatile *Destination,
 // CHECK:   %[[DEST:[0-9]+]] = bitcast i8** %Destination to [[iPTR]]*
 // CHECK:   %[[EXCHANGE:[0-9]+]] = ptrtoint i8* %Exchange to [[iPTR]]
 // CHECK:   %[[COMPARAND:[0-9]+]] = ptrtoint i8* %Comparand to [[iPTR]]
-// CHECK:   %[[XCHG:[0-9]+]] = cmpxchg volatile [[iPTR]]* %[[DEST:[0-9]+]], [[iPTR]] %[[COMPARAND:[0-9]+]], [[iPTR]] %[[EXCHANGE:[0-9]+]] monotonic monotonic
+// CHECK:   %[[XCHG:[0-9]+]] = cmpxchg volatile [[iPTR]]* %[[DEST:[0-9]+]], [[iPTR]] %[[COMPARAND:[0-9]+]], [[iPTR]] %[[EXCHANGE:[0-9]+]] monotonic monotonic, align {{4|8}}
 // CHECK:   %[[EXTRACT:[0-9]+]] = extractvalue { [[iPTR]], i1 } %[[XCHG]], 0
 // CHECK:   %[[RESULT:[0-9]+]] = inttoptr [[iPTR]] %[[EXTRACT]] to i8*
 // CHECK:   ret i8* %[[RESULT:[0-9]+]]
@@ -256,7 +256,7 @@ char test_InterlockedExchange8(char volatile *value, char mask) {
   return _InterlockedExchange8(value, mask);
 }
 // CHECK: define{{.*}}i8 @test_InterlockedExchange8(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i8* %value, i8 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i8* %value, i8 %mask seq_cst, align 1
 // CHECK:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -264,7 +264,7 @@ short test_InterlockedExchange16(short volatile *value, short mask) {
   return _InterlockedExchange16(value, mask);
 }
 // CHECK: define{{.*}}i16 @test_InterlockedExchange16(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i16* %value, i16 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i16* %value, i16 %mask seq_cst, align 2
 // CHECK:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -272,7 +272,7 @@ long test_InterlockedExchange(long volatile *value, long mask) {
   return _InterlockedExchange(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedExchange(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -280,7 +280,7 @@ char test_InterlockedExchangeAdd8(char volatile *value, char mask) {
   return _InterlockedExchangeAdd8(value, mask);
 }
 // CHECK: define{{.*}}i8 @test_InterlockedExchangeAdd8(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i8* %value, i8 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i8* %value, i8 %mask seq_cst, align 1
 // CHECK:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -288,7 +288,7 @@ short test_InterlockedExchangeAdd16(short volatile *value, short mask) {
   return _InterlockedExchangeAdd16(value, mask);
 }
 // CHECK: define{{.*}}i16 @test_InterlockedExchangeAdd16(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i16* %value, i16 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i16* %value, i16 %mask seq_cst, align 2
 // CHECK:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -296,7 +296,7 @@ long test_InterlockedExchangeAdd(long volatile *value, long mask) {
   return _InterlockedExchangeAdd(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedExchangeAdd(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -304,7 +304,7 @@ char test_InterlockedExchangeSub8(char volatile *value, char mask) {
   return _InterlockedExchangeSub8(value, mask);
 }
 // CHECK: define{{.*}}i8 @test_InterlockedExchangeSub8(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i8* %value, i8 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i8* %value, i8 %mask seq_cst, align 1
 // CHECK:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -312,7 +312,7 @@ short test_InterlockedExchangeSub16(short volatile *value, short mask) {
   return _InterlockedExchangeSub16(value, mask);
 }
 // CHECK: define{{.*}}i16 @test_InterlockedExchangeSub16(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i16* %value, i16 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i16* %value, i16 %mask seq_cst, align 2
 // CHECK:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -320,7 +320,7 @@ long test_InterlockedExchangeSub(long volatile *value, long mask) {
   return _InterlockedExchangeSub(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedExchangeSub(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -328,7 +328,7 @@ char test_InterlockedOr8(char volatile *value, char mask) {
   return _InterlockedOr8(value, mask);
 }
 // CHECK: define{{.*}}i8 @test_InterlockedOr8(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i8* %value, i8 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i8* %value, i8 %mask seq_cst, align 1
 // CHECK:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -336,7 +336,7 @@ short test_InterlockedOr16(short volatile *value, short mask) {
   return _InterlockedOr16(value, mask);
 }
 // CHECK: define{{.*}}i16 @test_InterlockedOr16(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i16* %value, i16 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i16* %value, i16 %mask seq_cst, align 2
 // CHECK:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -344,7 +344,7 @@ long test_InterlockedOr(long volatile *value, long mask) {
   return _InterlockedOr(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedOr(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -352,7 +352,7 @@ char test_InterlockedXor8(char volatile *value, char mask) {
   return _InterlockedXor8(value, mask);
 }
 // CHECK: define{{.*}}i8 @test_InterlockedXor8(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i8* %value, i8 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i8* %value, i8 %mask seq_cst, align 1
 // CHECK:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -360,7 +360,7 @@ short test_InterlockedXor16(short volatile *value, short mask) {
   return _InterlockedXor16(value, mask);
 }
 // CHECK: define{{.*}}i16 @test_InterlockedXor16(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i16* %value, i16 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i16* %value, i16 %mask seq_cst, align 2
 // CHECK:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -368,7 +368,7 @@ long test_InterlockedXor(long volatile *value, long mask) {
   return _InterlockedXor(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedXor(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -376,7 +376,7 @@ char test_InterlockedAnd8(char volatile *value, char mask) {
   return _InterlockedAnd8(value, mask);
 }
 // CHECK: define{{.*}}i8 @test_InterlockedAnd8(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i8* %value, i8 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i8* %value, i8 %mask seq_cst, align 1
 // CHECK:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -384,7 +384,7 @@ short test_InterlockedAnd16(short volatile *value, short mask) {
   return _InterlockedAnd16(value, mask);
 }
 // CHECK: define{{.*}}i16 @test_InterlockedAnd16(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i16* %value, i16 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i16* %value, i16 %mask seq_cst, align 2
 // CHECK:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -392,7 +392,7 @@ long test_InterlockedAnd(long volatile *value, long mask) {
   return _InterlockedAnd(value, mask);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedAnd(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask seq_cst, align 4
 // CHECK:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -400,7 +400,7 @@ char test_InterlockedCompareExchange8(char volatile *Destination, char Exchange,
   return _InterlockedCompareExchange8(Destination, Exchange, Comperand);
 }
 // CHECK: define{{.*}}i8 @test_InterlockedCompareExchange8(i8*{{[a-z_ ]*}}%Destination, i8{{[a-z_ ]*}}%Exchange, i8{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i8* %Destination, i8 %Comperand, i8 %Exchange seq_cst seq_cst
+// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i8* %Destination, i8 %Comperand, i8 %Exchange seq_cst seq_cst, align 1
 // CHECK: [[RESULT:%[0-9]+]] = extractvalue { i8, i1 } [[TMP]], 0
 // CHECK: ret i8 [[RESULT]]
 // CHECK: }
@@ -409,7 +409,7 @@ short test_InterlockedCompareExchange16(short volatile *Destination, short Excha
   return _InterlockedCompareExchange16(Destination, Exchange, Comperand);
 }
 // CHECK: define{{.*}}i16 @test_InterlockedCompareExchange16(i16*{{[a-z_ ]*}}%Destination, i16{{[a-z_ ]*}}%Exchange, i16{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i16* %Destination, i16 %Comperand, i16 %Exchange seq_cst seq_cst
+// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i16* %Destination, i16 %Comperand, i16 %Exchange seq_cst seq_cst, align 2
 // CHECK: [[RESULT:%[0-9]+]] = extractvalue { i16, i1 } [[TMP]], 0
 // CHECK: ret i16 [[RESULT]]
 // CHECK: }
@@ -418,7 +418,7 @@ long test_InterlockedCompareExchange(long volatile *Destination, long Exchange,
   return _InterlockedCompareExchange(Destination, Exchange, Comperand);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedCompareExchange(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange seq_cst seq_cst
+// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange seq_cst seq_cst, align 4
 // CHECK: [[RESULT:%[0-9]+]] = extractvalue { i32, i1 } [[TMP]], 0
 // CHECK: ret i32 [[RESULT]]
 // CHECK: }
@@ -427,7 +427,7 @@ __int64 test_InterlockedCompareExchange64(__int64 volatile *Destination, __int64
   return _InterlockedCompareExchange64(Destination, Exchange, Comperand);
 }
 // CHECK: define{{.*}}i64 @test_InterlockedCompareExchange64(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%Exchange, i64{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i64* %Destination, i64 %Comperand, i64 %Exchange seq_cst seq_cst
+// CHECK: [[TMP:%[0-9]+]] = cmpxchg volatile i64* %Destination, i64 %Comperand, i64 %Exchange seq_cst seq_cst, align 8
 // CHECK: [[RESULT:%[0-9]+]] = extractvalue { i64, i1 } [[TMP]], 0
 // CHECK: ret i64 [[RESULT]]
 // CHECK: }
@@ -451,7 +451,7 @@ unsigned char test_InterlockedCompareExchange128(
 // CHECK-64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64
 // CHECK-64: [[EXP:%[0-9]+]] = or i128 [[EHS]], [[EL]]
 // CHECK-64: [[ORG:%[0-9]+]] = load i128, i128* [[CNR]], align 16
-// CHECK-64: [[RES:%[0-9]+]] = cmpxchg volatile i128* [[DST]], i128 [[ORG]], i128 [[EXP]] seq_cst seq_cst
+// CHECK-64: [[RES:%[0-9]+]] = cmpxchg volatile i128* [[DST]], i128 [[ORG]], i128 [[EXP]] seq_cst seq_cst, align 16
 // CHECK-64: [[OLD:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 0
 // CHECK-64: store i128 [[OLD]], i128* [[CNR]], align 16
 // CHECK-64: [[SUC1:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 1
@@ -480,11 +480,11 @@ unsigned char test_InterlockedCompareExchange128_rel(
                                             ExchangeLow, ComparandResult);
 }
 // CHECK-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange128_acq({{.*}})
-// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} acquire acquire
+// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} acquire acquire, align 16
 // CHECK-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange128_nf({{.*}})
-// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} monotonic monotonic
+// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} monotonic monotonic, align 16
 // CHECK-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange128_rel({{.*}})
-// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} release monotonic
+// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} release monotonic, align 16
 #endif
 
 short test_InterlockedIncrement16(short volatile *Addend) {
@@ -492,7 +492,7 @@ short test_InterlockedIncrement16(short volatile *Addend) {
 }
 // CHECK: define{{.*}}i16 @test_InterlockedIncrement16(i16*{{[a-z_ ]*}}%Addend){{.*}}{
 // CHECK: %incdec.ptr = getelementptr inbounds i16, i16* %Addend, {{i64|i32}} 1
-// CHECK: [[TMP:%[0-9]+]] = atomicrmw add i16* %incdec.ptr, i16 1 seq_cst
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw add i16* %incdec.ptr, i16 1 seq_cst, align 2
 // CHECK: [[RESULT:%[0-9]+]] = add i16 [[TMP]], 1
 // CHECK: ret i16 [[RESULT]]
 // CHECK: }
@@ -502,7 +502,7 @@ long test_InterlockedIncrement(long volatile *Addend) {
 }
 // CHECK: define{{.*}}i32 @test_InterlockedIncrement(i32*{{[a-z_ ]*}}%Addend){{.*}}{
 // CHECK: %incdec.ptr = getelementptr inbounds i32, i32* %Addend, {{i64|i32}} 1
-// CHECK: [[TMP:%[0-9]+]] = atomicrmw add i32* %incdec.ptr, i32 1 seq_cst
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw add i32* %incdec.ptr, i32 1 seq_cst, align 4
 // CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
 // CHECK: ret i32 [[RESULT]]
 // CHECK: }
@@ -511,7 +511,7 @@ short test_InterlockedDecrement16(short volatile *Addend) {
   return _InterlockedDecrement16(Addend);
 }
 // CHECK: define{{.*}}i16 @test_InterlockedDecrement16(i16*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = atomicrmw sub i16* %Addend, i16 1 seq_cst
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw sub i16* %Addend, i16 1 seq_cst, align 2
 // CHECK: [[RESULT:%[0-9]+]] = add i16 [[TMP]], -1
 // CHECK: ret i16 [[RESULT]]
 // CHECK: }
@@ -520,7 +520,7 @@ long test_InterlockedDecrement(long volatile *Addend) {
   return _InterlockedDecrement(Addend);
 }
 // CHECK: define{{.*}}i32 @test_InterlockedDecrement(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 seq_cst
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 seq_cst, align 4
 // CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
 // CHECK: ret i32 [[RESULT]]
 // CHECK: }
@@ -559,7 +559,7 @@ __int64 test_InterlockedExchange64(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchange64(value, mask);
 }
 // CHECK: define{{.*}}i64 @test_InterlockedExchange64(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i64* %value, i64 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xchg i64* %value, i64 %mask seq_cst, align 8
 // CHECK:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -567,7 +567,7 @@ __int64 test_InterlockedExchangeAdd64(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchangeAdd64(value, mask);
 }
 // CHECK: define{{.*}}i64 @test_InterlockedExchangeAdd64(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i64* %value, i64 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw add i64* %value, i64 %mask seq_cst, align 8
 // CHECK:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -575,7 +575,7 @@ __int64 test_InterlockedExchangeSub64(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchangeSub64(value, mask);
 }
 // CHECK: define{{.*}}i64 @test_InterlockedExchangeSub64(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i64* %value, i64 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw sub i64* %value, i64 %mask seq_cst, align 8
 // CHECK:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -583,7 +583,7 @@ __int64 test_InterlockedOr64(__int64 volatile *value, __int64 mask) {
   return _InterlockedOr64(value, mask);
 }
 // CHECK: define{{.*}}i64 @test_InterlockedOr64(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i64* %value, i64 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw or i64* %value, i64 %mask seq_cst, align 8
 // CHECK:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -591,7 +591,7 @@ __int64 test_InterlockedXor64(__int64 volatile *value, __int64 mask) {
   return _InterlockedXor64(value, mask);
 }
 // CHECK: define{{.*}}i64 @test_InterlockedXor64(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i64* %value, i64 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw xor i64* %value, i64 %mask seq_cst, align 8
 // CHECK:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -599,7 +599,7 @@ __int64 test_InterlockedAnd64(__int64 volatile *value, __int64 mask) {
   return _InterlockedAnd64(value, mask);
 }
 // CHECK: define{{.*}}i64 @test_InterlockedAnd64(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i64* %value, i64 %mask seq_cst
+// CHECK:   [[RESULT:%[0-9]+]] = atomicrmw and i64* %value, i64 %mask seq_cst, align 8
 // CHECK:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK: }
 
@@ -607,7 +607,7 @@ __int64 test_InterlockedIncrement64(__int64 volatile *Addend) {
   return _InterlockedIncrement64(Addend);
 }
 // CHECK: define{{.*}}i64 @test_InterlockedIncrement64(i64*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = atomicrmw add i64* %Addend, i64 1 seq_cst
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw add i64* %Addend, i64 1 seq_cst, align 8
 // CHECK: [[RESULT:%[0-9]+]] = add i64 [[TMP]], 1
 // CHECK: ret i64 [[RESULT]]
 // CHECK: }
@@ -616,7 +616,7 @@ __int64 test_InterlockedDecrement64(__int64 volatile *Addend) {
   return _InterlockedDecrement64(Addend);
 }
 // CHECK: define{{.*}}i64 @test_InterlockedDecrement64(i64*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK: [[TMP:%[0-9]+]] = atomicrmw sub i64* %Addend, i64 1 seq_cst
+// CHECK: [[TMP:%[0-9]+]] = atomicrmw sub i64* %Addend, i64 1 seq_cst, align 8
 // CHECK: [[RESULT:%[0-9]+]] = add i64 [[TMP]], -1
 // CHECK: ret i64 [[RESULT]]
 // CHECK: }
@@ -677,84 +677,84 @@ char test_InterlockedExchangeAdd8_acq(char volatile *value, char mask) {
   return _InterlockedExchangeAdd8_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedExchangeAdd8_acq(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i8* %value, i8 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i8* %value, i8 %mask acquire, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 char test_InterlockedExchangeAdd8_rel(char volatile *value, char mask) {
   return _InterlockedExchangeAdd8_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedExchangeAdd8_rel(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i8* %value, i8 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i8* %value, i8 %mask release, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 char test_InterlockedExchangeAdd8_nf(char volatile *value, char mask) {
   return _InterlockedExchangeAdd8_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedExchangeAdd8_nf(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i8* %value, i8 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i8* %value, i8 %mask monotonic, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 short test_InterlockedExchangeAdd16_acq(short volatile *value, short mask) {
   return _InterlockedExchangeAdd16_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedExchangeAdd16_acq(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i16* %value, i16 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i16* %value, i16 %mask acquire, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 short test_InterlockedExchangeAdd16_rel(short volatile *value, short mask) {
   return _InterlockedExchangeAdd16_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedExchangeAdd16_rel(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i16* %value, i16 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i16* %value, i16 %mask release, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 short test_InterlockedExchangeAdd16_nf(short volatile *value, short mask) {
   return _InterlockedExchangeAdd16_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedExchangeAdd16_nf(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i16* %value, i16 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i16* %value, i16 %mask monotonic, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 long test_InterlockedExchangeAdd_acq(long volatile *value, long mask) {
   return _InterlockedExchangeAdd_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedExchangeAdd_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 long test_InterlockedExchangeAdd_rel(long volatile *value, long mask) {
   return _InterlockedExchangeAdd_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedExchangeAdd_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask release, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 long test_InterlockedExchangeAdd_nf(long volatile *value, long mask) {
   return _InterlockedExchangeAdd_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedExchangeAdd_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 __int64 test_InterlockedExchangeAdd64_acq(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchangeAdd64_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedExchangeAdd64_acq(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i64* %value, i64 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i64* %value, i64 %mask acquire, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 __int64 test_InterlockedExchangeAdd64_rel(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchangeAdd64_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedExchangeAdd64_rel(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i64* %value, i64 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i64* %value, i64 %mask release, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 __int64 test_InterlockedExchangeAdd64_nf(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchangeAdd64_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedExchangeAdd64_nf(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i64* %value, i64 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw add i64* %value, i64 %mask monotonic, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -762,84 +762,84 @@ char test_InterlockedExchange8_acq(char volatile *value, char mask) {
   return _InterlockedExchange8_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedExchange8_acq(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i8* %value, i8 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i8* %value, i8 %mask acquire, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 char test_InterlockedExchange8_rel(char volatile *value, char mask) {
   return _InterlockedExchange8_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedExchange8_rel(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i8* %value, i8 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i8* %value, i8 %mask release, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 char test_InterlockedExchange8_nf(char volatile *value, char mask) {
   return _InterlockedExchange8_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedExchange8_nf(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i8* %value, i8 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i8* %value, i8 %mask monotonic, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 short test_InterlockedExchange16_acq(short volatile *value, short mask) {
   return _InterlockedExchange16_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedExchange16_acq(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i16* %value, i16 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i16* %value, i16 %mask acquire, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 short test_InterlockedExchange16_rel(short volatile *value, short mask) {
   return _InterlockedExchange16_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedExchange16_rel(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i16* %value, i16 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i16* %value, i16 %mask release, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 short test_InterlockedExchange16_nf(short volatile *value, short mask) {
   return _InterlockedExchange16_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedExchange16_nf(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i16* %value, i16 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i16* %value, i16 %mask monotonic, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 long test_InterlockedExchange_acq(long volatile *value, long mask) {
   return _InterlockedExchange_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedExchange_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 long test_InterlockedExchange_rel(long volatile *value, long mask) {
   return _InterlockedExchange_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedExchange_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask release, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 long test_InterlockedExchange_nf(long volatile *value, long mask) {
   return _InterlockedExchange_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedExchange_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 __int64 test_InterlockedExchange64_acq(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchange64_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedExchange64_acq(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i64* %value, i64 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i64* %value, i64 %mask acquire, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 __int64 test_InterlockedExchange64_rel(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchange64_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedExchange64_rel(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i64* %value, i64 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i64* %value, i64 %mask release, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 __int64 test_InterlockedExchange64_nf(__int64 volatile *value, __int64 mask) {
   return _InterlockedExchange64_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedExchange64_nf(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i64* %value, i64 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xchg i64* %value, i64 %mask monotonic, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -847,7 +847,7 @@ char test_InterlockedCompareExchange8_acq(char volatile *Destination, char Excha
   return _InterlockedCompareExchange8_acq(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange8_acq(i8*{{[a-z_ ]*}}%Destination, i8{{[a-z_ ]*}}%Exchange, i8{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i8* %Destination, i8 %Comperand, i8 %Exchange acquire acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i8* %Destination, i8 %Comperand, i8 %Exchange acquire acquire, align 1
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i8, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i8 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -856,7 +856,7 @@ char test_InterlockedCompareExchange8_rel(char volatile *Destination, char Excha
   return _InterlockedCompareExchange8_rel(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange8_rel(i8*{{[a-z_ ]*}}%Destination, i8{{[a-z_ ]*}}%Exchange, i8{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i8* %Destination, i8 %Comperand, i8 %Exchange release monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i8* %Destination, i8 %Comperand, i8 %Exchange release monotonic, align 1
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i8, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i8 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -865,7 +865,7 @@ char test_InterlockedCompareExchange8_nf(char volatile *Destination, char Exchan
   return _InterlockedCompareExchange8_nf(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange8_nf(i8*{{[a-z_ ]*}}%Destination, i8{{[a-z_ ]*}}%Exchange, i8{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i8* %Destination, i8 %Comperand, i8 %Exchange monotonic monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i8* %Destination, i8 %Comperand, i8 %Exchange monotonic monotonic, align 1
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i8, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i8 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -874,7 +874,7 @@ short test_InterlockedCompareExchange16_acq(short volatile *Destination, short E
   return _InterlockedCompareExchange16_acq(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedCompareExchange16_acq(i16*{{[a-z_ ]*}}%Destination, i16{{[a-z_ ]*}}%Exchange, i16{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i16* %Destination, i16 %Comperand, i16 %Exchange acquire acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i16* %Destination, i16 %Comperand, i16 %Exchange acquire acquire, align 2
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i16, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i16 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -883,7 +883,7 @@ short test_InterlockedCompareExchange16_rel(short volatile *Destination, short E
   return _InterlockedCompareExchange16_rel(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedCompareExchange16_rel(i16*{{[a-z_ ]*}}%Destination, i16{{[a-z_ ]*}}%Exchange, i16{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i16* %Destination, i16 %Comperand, i16 %Exchange release monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i16* %Destination, i16 %Comperand, i16 %Exchange release monotonic, align 2
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i16, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i16 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -892,7 +892,7 @@ short test_InterlockedCompareExchange16_nf(short volatile *Destination, short Ex
   return _InterlockedCompareExchange16_nf(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedCompareExchange16_nf(i16*{{[a-z_ ]*}}%Destination, i16{{[a-z_ ]*}}%Exchange, i16{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i16* %Destination, i16 %Comperand, i16 %Exchange monotonic monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i16* %Destination, i16 %Comperand, i16 %Exchange monotonic monotonic, align 2
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i16, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i16 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -901,7 +901,7 @@ long test_InterlockedCompareExchange_acq(long volatile *Destination, long Exchan
   return _InterlockedCompareExchange_acq(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedCompareExchange_acq(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange acquire acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange acquire acquire, align 4
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i32, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i32 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -910,7 +910,7 @@ long test_InterlockedCompareExchange_rel(long volatile *Destination, long Exchan
   return _InterlockedCompareExchange_rel(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedCompareExchange_rel(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange release monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange release monotonic, align 4
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i32, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i32 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -919,7 +919,7 @@ long test_InterlockedCompareExchange_nf(long volatile *Destination, long Exchang
   return _InterlockedCompareExchange_nf(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedCompareExchange_nf(i32*{{[a-z_ ]*}}%Destination, i32{{[a-z_ ]*}}%Exchange, i32{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange monotonic monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i32* %Destination, i32 %Comperand, i32 %Exchange monotonic monotonic, align 4
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i32, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i32 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -928,7 +928,7 @@ __int64 test_InterlockedCompareExchange64_acq(__int64 volatile *Destination, __i
   return _InterlockedCompareExchange64_acq(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedCompareExchange64_acq(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%Exchange, i64{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i64* %Destination, i64 %Comperand, i64 %Exchange acquire acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i64* %Destination, i64 %Comperand, i64 %Exchange acquire acquire, align 8
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i64, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i64 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -937,7 +937,7 @@ __int64 test_InterlockedCompareExchange64_rel(__int64 volatile *Destination, __i
   return _InterlockedCompareExchange64_rel(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedCompareExchange64_rel(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%Exchange, i64{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i64* %Destination, i64 %Comperand, i64 %Exchange release monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i64* %Destination, i64 %Comperand, i64 %Exchange release monotonic, align 8
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i64, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i64 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -946,7 +946,7 @@ __int64 test_InterlockedCompareExchange64_nf(__int64 volatile *Destination, __in
   return _InterlockedCompareExchange64_nf(Destination, Exchange, Comperand);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedCompareExchange64_nf(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%Exchange, i64{{[a-z_ ]*}}%Comperand){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i64* %Destination, i64 %Comperand, i64 %Exchange monotonic monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = cmpxchg volatile i64* %Destination, i64 %Comperand, i64 %Exchange monotonic monotonic, align 8
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = extractvalue { i64, i1 } [[TMP]], 0
 // CHECK-ARM-ARM64: ret i64 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -955,7 +955,7 @@ char test_InterlockedOr8_acq(char volatile *value, char mask) {
   return _InterlockedOr8_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedOr8_acq(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i8* %value, i8 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i8* %value, i8 %mask acquire, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -963,7 +963,7 @@ char test_InterlockedOr8_rel(char volatile *value, char mask) {
   return _InterlockedOr8_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedOr8_rel(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i8* %value, i8 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i8* %value, i8 %mask release, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -971,7 +971,7 @@ char test_InterlockedOr8_nf(char volatile *value, char mask) {
   return _InterlockedOr8_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedOr8_nf(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i8* %value, i8 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i8* %value, i8 %mask monotonic, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -979,7 +979,7 @@ short test_InterlockedOr16_acq(short volatile *value, short mask) {
   return _InterlockedOr16_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedOr16_acq(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i16* %value, i16 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i16* %value, i16 %mask acquire, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -987,7 +987,7 @@ short test_InterlockedOr16_rel(short volatile *value, short mask) {
   return _InterlockedOr16_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedOr16_rel(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i16* %value, i16 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i16* %value, i16 %mask release, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -995,7 +995,7 @@ short test_InterlockedOr16_nf(short volatile *value, short mask) {
   return _InterlockedOr16_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedOr16_nf(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i16* %value, i16 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i16* %value, i16 %mask monotonic, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1003,7 +1003,7 @@ long test_InterlockedOr_acq(long volatile *value, long mask) {
   return _InterlockedOr_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedOr_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1011,7 +1011,7 @@ long test_InterlockedOr_rel(long volatile *value, long mask) {
   return _InterlockedOr_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedOr_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask release, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1019,7 +1019,7 @@ long test_InterlockedOr_nf(long volatile *value, long mask) {
   return _InterlockedOr_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedOr_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1027,7 +1027,7 @@ __int64 test_InterlockedOr64_acq(__int64 volatile *value, __int64 mask) {
   return _InterlockedOr64_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedOr64_acq(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i64* %value, i64 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i64* %value, i64 %mask acquire, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1035,7 +1035,7 @@ __int64 test_InterlockedOr64_rel(__int64 volatile *value, __int64 mask) {
   return _InterlockedOr64_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedOr64_rel(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i64* %value, i64 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i64* %value, i64 %mask release, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1043,7 +1043,7 @@ __int64 test_InterlockedOr64_nf(__int64 volatile *value, __int64 mask) {
   return _InterlockedOr64_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedOr64_nf(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i64* %value, i64 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw or i64* %value, i64 %mask monotonic, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1051,7 +1051,7 @@ char test_InterlockedXor8_acq(char volatile *value, char mask) {
   return _InterlockedXor8_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedXor8_acq(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i8* %value, i8 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i8* %value, i8 %mask acquire, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1059,7 +1059,7 @@ char test_InterlockedXor8_rel(char volatile *value, char mask) {
   return _InterlockedXor8_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedXor8_rel(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i8* %value, i8 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i8* %value, i8 %mask release, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1067,7 +1067,7 @@ char test_InterlockedXor8_nf(char volatile *value, char mask) {
   return _InterlockedXor8_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedXor8_nf(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i8* %value, i8 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i8* %value, i8 %mask monotonic, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1075,7 +1075,7 @@ short test_InterlockedXor16_acq(short volatile *value, short mask) {
   return _InterlockedXor16_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedXor16_acq(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i16* %value, i16 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i16* %value, i16 %mask acquire, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1083,7 +1083,7 @@ short test_InterlockedXor16_rel(short volatile *value, short mask) {
   return _InterlockedXor16_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedXor16_rel(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i16* %value, i16 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i16* %value, i16 %mask release, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1091,7 +1091,7 @@ short test_InterlockedXor16_nf(short volatile *value, short mask) {
   return _InterlockedXor16_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedXor16_nf(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i16* %value, i16 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i16* %value, i16 %mask monotonic, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1099,7 +1099,7 @@ long test_InterlockedXor_acq(long volatile *value, long mask) {
   return _InterlockedXor_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedXor_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1107,7 +1107,7 @@ long test_InterlockedXor_rel(long volatile *value, long mask) {
   return _InterlockedXor_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedXor_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask release, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1115,7 +1115,7 @@ long test_InterlockedXor_nf(long volatile *value, long mask) {
   return _InterlockedXor_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedXor_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1123,7 +1123,7 @@ __int64 test_InterlockedXor64_acq(__int64 volatile *value, __int64 mask) {
   return _InterlockedXor64_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedXor64_acq(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i64* %value, i64 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i64* %value, i64 %mask acquire, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1131,7 +1131,7 @@ __int64 test_InterlockedXor64_rel(__int64 volatile *value, __int64 mask) {
   return _InterlockedXor64_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedXor64_rel(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i64* %value, i64 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i64* %value, i64 %mask release, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1139,7 +1139,7 @@ __int64 test_InterlockedXor64_nf(__int64 volatile *value, __int64 mask) {
   return _InterlockedXor64_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedXor64_nf(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i64* %value, i64 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw xor i64* %value, i64 %mask monotonic, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1147,7 +1147,7 @@ char test_InterlockedAnd8_acq(char volatile *value, char mask) {
   return _InterlockedAnd8_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedAnd8_acq(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i8* %value, i8 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i8* %value, i8 %mask acquire, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1155,7 +1155,7 @@ char test_InterlockedAnd8_rel(char volatile *value, char mask) {
   return _InterlockedAnd8_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedAnd8_rel(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i8* %value, i8 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i8* %value, i8 %mask release, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1163,7 +1163,7 @@ char test_InterlockedAnd8_nf(char volatile *value, char mask) {
   return _InterlockedAnd8_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i8 @test_InterlockedAnd8_nf(i8*{{[a-z_ ]*}}%value, i8{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i8* %value, i8 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i8* %value, i8 %mask monotonic, align 1
 // CHECK-ARM-ARM64:   ret i8 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1171,7 +1171,7 @@ short test_InterlockedAnd16_acq(short volatile *value, short mask) {
   return _InterlockedAnd16_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedAnd16_acq(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i16* %value, i16 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i16* %value, i16 %mask acquire, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1179,7 +1179,7 @@ short test_InterlockedAnd16_rel(short volatile *value, short mask) {
   return _InterlockedAnd16_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedAnd16_rel(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i16* %value, i16 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i16* %value, i16 %mask release, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1187,7 +1187,7 @@ short test_InterlockedAnd16_nf(short volatile *value, short mask) {
   return _InterlockedAnd16_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedAnd16_nf(i16*{{[a-z_ ]*}}%value, i16{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i16* %value, i16 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i16* %value, i16 %mask monotonic, align 2
 // CHECK-ARM-ARM64:   ret i16 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1195,7 +1195,7 @@ long test_InterlockedAnd_acq(long volatile *value, long mask) {
   return _InterlockedAnd_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedAnd_acq(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask acquire, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1203,7 +1203,7 @@ long test_InterlockedAnd_rel(long volatile *value, long mask) {
   return _InterlockedAnd_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedAnd_rel(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask release, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1211,7 +1211,7 @@ long test_InterlockedAnd_nf(long volatile *value, long mask) {
   return _InterlockedAnd_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedAnd_nf(i32*{{[a-z_ ]*}}%value, i32{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i32* %value, i32 %mask monotonic, align 4
 // CHECK-ARM-ARM64:   ret i32 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1219,7 +1219,7 @@ __int64 test_InterlockedAnd64_acq(__int64 volatile *value, __int64 mask) {
   return _InterlockedAnd64_acq(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedAnd64_acq(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i64* %value, i64 %mask acquire
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i64* %value, i64 %mask acquire, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1227,7 +1227,7 @@ __int64 test_InterlockedAnd64_rel(__int64 volatile *value, __int64 mask) {
   return _InterlockedAnd64_rel(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedAnd64_rel(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i64* %value, i64 %mask release
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i64* %value, i64 %mask release, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1235,7 +1235,7 @@ __int64 test_InterlockedAnd64_nf(__int64 volatile *value, __int64 mask) {
   return _InterlockedAnd64_nf(value, mask);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedAnd64_nf(i64*{{[a-z_ ]*}}%value, i64{{[a-z_ ]*}}%mask){{.*}}{
-// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i64* %value, i64 %mask monotonic
+// CHECK-ARM-ARM64:   [[RESULT:%[0-9]+]] = atomicrmw and i64* %value, i64 %mask monotonic, align 8
 // CHECK-ARM-ARM64:   ret i64 [[RESULT:%[0-9]+]]
 // CHECK-ARM-ARM64: }
 
@@ -1243,7 +1243,7 @@ short test_InterlockedIncrement16_acq(short volatile *Addend) {
   return _InterlockedIncrement16_acq(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedIncrement16_acq(i16*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i16* %Addend, i16 1 acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i16* %Addend, i16 1 acquire, align 2
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i16 [[TMP]], 1
 // CHECK-ARM-ARM64: ret i16 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1252,7 +1252,7 @@ short test_InterlockedIncrement16_rel(short volatile *Addend) {
   return _InterlockedIncrement16_rel(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedIncrement16_rel(i16*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i16* %Addend, i16 1 release
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i16* %Addend, i16 1 release, align 2
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i16 [[TMP]], 1
 // CHECK-ARM-ARM64: ret i16 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1261,7 +1261,7 @@ short test_InterlockedIncrement16_nf(short volatile *Addend) {
   return _InterlockedIncrement16_nf(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedIncrement16_nf(i16*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i16* %Addend, i16 1 monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i16* %Addend, i16 1 monotonic, align 2
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i16 [[TMP]], 1
 // CHECK-ARM-ARM64: ret i16 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1270,7 +1270,7 @@ long test_InterlockedIncrement_acq(long volatile *Addend) {
   return _InterlockedIncrement_acq(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedIncrement_acq(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 acquire, align 4
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
 // CHECK-ARM-ARM64: ret i32 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1279,7 +1279,7 @@ long test_InterlockedIncrement_rel(long volatile *Addend) {
   return _InterlockedIncrement_rel(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedIncrement_rel(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 release
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 release, align 4
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
 // CHECK-ARM-ARM64: ret i32 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1288,7 +1288,7 @@ long test_InterlockedIncrement_nf(long volatile *Addend) {
   return _InterlockedIncrement_nf(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedIncrement_nf(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i32* %Addend, i32 1 monotonic, align 4
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i32 [[TMP]], 1
 // CHECK-ARM-ARM64: ret i32 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1297,7 +1297,7 @@ __int64 test_InterlockedIncrement64_acq(__int64 volatile *Addend) {
   return _InterlockedIncrement64_acq(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedIncrement64_acq(i64*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i64* %Addend, i64 1 acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i64* %Addend, i64 1 acquire, align 8
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i64 [[TMP]], 1
 // CHECK-ARM-ARM64: ret i64 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1306,7 +1306,7 @@ __int64 test_InterlockedIncrement64_rel(__int64 volatile *Addend) {
   return _InterlockedIncrement64_rel(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedIncrement64_rel(i64*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i64* %Addend, i64 1 release
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i64* %Addend, i64 1 release, align 8
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i64 [[TMP]], 1
 // CHECK-ARM-ARM64: ret i64 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1315,7 +1315,7 @@ __int64 test_InterlockedIncrement64_nf(__int64 volatile *Addend) {
   return _InterlockedIncrement64_nf(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedIncrement64_nf(i64*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i64* %Addend, i64 1 monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw add i64* %Addend, i64 1 monotonic, align 8
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i64 [[TMP]], 1
 // CHECK-ARM-ARM64: ret i64 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1324,7 +1324,7 @@ short test_InterlockedDecrement16_acq(short volatile *Addend) {
   return _InterlockedDecrement16_acq(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedDecrement16_acq(i16*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i16* %Addend, i16 1 acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i16* %Addend, i16 1 acquire, align 2
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i16 [[TMP]], -1
 // CHECK-ARM-ARM64: ret i16 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1333,7 +1333,7 @@ short test_InterlockedDecrement16_rel(short volatile *Addend) {
   return _InterlockedDecrement16_rel(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedDecrement16_rel(i16*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i16* %Addend, i16 1 release
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i16* %Addend, i16 1 release, align 2
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i16 [[TMP]], -1
 // CHECK-ARM-ARM64: ret i16 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1342,7 +1342,7 @@ short test_InterlockedDecrement16_nf(short volatile *Addend) {
   return _InterlockedDecrement16_nf(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i16 @test_InterlockedDecrement16_nf(i16*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i16* %Addend, i16 1 monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i16* %Addend, i16 1 monotonic, align 2
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i16 [[TMP]], -1
 // CHECK-ARM-ARM64: ret i16 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1351,7 +1351,7 @@ long test_InterlockedDecrement_acq(long volatile *Addend) {
   return _InterlockedDecrement_acq(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedDecrement_acq(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 acquire, align 4
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
 // CHECK-ARM-ARM64: ret i32 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1360,7 +1360,7 @@ long test_InterlockedDecrement_rel(long volatile *Addend) {
   return _InterlockedDecrement_rel(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedDecrement_rel(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 release
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 release, align 4
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
 // CHECK-ARM-ARM64: ret i32 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1369,7 +1369,7 @@ long test_InterlockedDecrement_nf(long volatile *Addend) {
   return _InterlockedDecrement_nf(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i32 @test_InterlockedDecrement_nf(i32*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i32* %Addend, i32 1 monotonic, align 4
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
 // CHECK-ARM-ARM64: ret i32 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1378,7 +1378,7 @@ __int64 test_InterlockedDecrement64_acq(__int64 volatile *Addend) {
   return _InterlockedDecrement64_acq(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedDecrement64_acq(i64*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i64* %Addend, i64 1 acquire
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i64* %Addend, i64 1 acquire, align 8
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i64 [[TMP]], -1
 // CHECK-ARM-ARM64: ret i64 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1387,7 +1387,7 @@ __int64 test_InterlockedDecrement64_rel(__int64 volatile *Addend) {
   return _InterlockedDecrement64_rel(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedDecrement64_rel(i64*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i64* %Addend, i64 1 release
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i64* %Addend, i64 1 release, align 8
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i64 [[TMP]], -1
 // CHECK-ARM-ARM64: ret i64 [[RESULT]]
 // CHECK-ARM-ARM64: }
@@ -1396,7 +1396,7 @@ __int64 test_InterlockedDecrement64_nf(__int64 volatile *Addend) {
   return _InterlockedDecrement64_nf(Addend);
 }
 // CHECK-ARM-ARM64: define{{.*}}i64 @test_InterlockedDecrement64_nf(i64*{{[a-z_ ]*}}%Addend){{.*}}{
-// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i64* %Addend, i64 1 monotonic
+// CHECK-ARM-ARM64: [[TMP:%[0-9]+]] = atomicrmw sub i64* %Addend, i64 1 monotonic, align 8
 // CHECK-ARM-ARM64: [[RESULT:%[0-9]+]] = add i64 [[TMP]], -1
 // CHECK-ARM-ARM64: ret i64 [[RESULT]]
 // CHECK-ARM-ARM64: }

diff  --git a/clang/test/CodeGen/ms-volatile.c b/clang/test/CodeGen/ms-volatile.c
index a3ef35a3faad..a7ea10902fd9 100644
--- a/clang/test/CodeGen/ms-volatile.c
+++ b/clang/test/CodeGen/ms-volatile.c
@@ -17,32 +17,32 @@ struct qux {
 void test1(struct foo *p, struct foo *q) {
   *p = *q;
   // CHECK-LABEL: @test1
-  // CHECK: load atomic volatile {{.*}} acquire
-  // CHECK: store atomic volatile {{.*}}, {{.*}} release
+  // CHECK: load atomic volatile {{.*}} acquire, align 4
+  // CHECK: store atomic volatile {{.*}}, {{.*}} release, align 4
 }
 void test2(volatile int *p, volatile int *q) {
   *p = *q;
   // CHECK-LABEL: @test2
-  // CHECK: load atomic volatile {{.*}} acquire
-  // CHECK: store atomic volatile {{.*}}, {{.*}} release
+  // CHECK: load atomic volatile {{.*}} acquire, align 4
+  // CHECK: store atomic volatile {{.*}}, {{.*}} release, align 4
 }
 void test3(struct foo *p, struct foo *q) {
   p->x = q->x;
   // CHECK-LABEL: @test3
-  // CHECK: load atomic volatile {{.*}} acquire
-  // CHECK: store atomic volatile {{.*}}, {{.*}} release
+  // CHECK: load atomic volatile {{.*}} acquire, align 4
+  // CHECK: store atomic volatile {{.*}}, {{.*}} release, align 4
 }
 void test4(volatile struct foo *p, volatile struct foo *q) {
   p->x = q->x;
   // CHECK-LABEL: @test4
-  // CHECK: load atomic volatile {{.*}} acquire
-  // CHECK: store atomic volatile {{.*}}, {{.*}} release
+  // CHECK: load atomic volatile {{.*}} acquire, align 4
+  // CHECK: store atomic volatile {{.*}}, {{.*}} release, align 4
 }
 void test5(volatile struct foo *p, volatile struct foo *q) {
   *p = *q;
   // CHECK-LABEL: @test5
-  // CHECK: load atomic volatile {{.*}} acquire
-  // CHECK: store atomic volatile {{.*}}, {{.*}} release
+  // CHECK: load atomic volatile {{.*}} acquire, align 4
+  // CHECK: store atomic volatile {{.*}}, {{.*}} release, align 4
 }
 void test6(struct bar *p, struct bar *q) {
   *p = *q;
@@ -53,35 +53,35 @@ void test6(struct bar *p, struct bar *q) {
 void test7(volatile struct bar *p, volatile struct bar *q) {
   *p = *q;
   // CHECK-LABEL: @test7
-  // CHECK: load atomic volatile {{.*}} acquire
-  // CHECK: store atomic volatile {{.*}}, {{.*}} release
+  // CHECK: load atomic volatile {{.*}} acquire, align 4
+  // CHECK: store atomic volatile {{.*}}, {{.*}} release, align 4
 }
 void test8(volatile double *p, volatile double *q) {
   *p = *q;
   // CHECK-LABEL: @test8
-  // CHECK: load volatile {{.*}}
-  // CHECK: store volatile {{.*}}, {{.*}}
+  // CHECK: load volatile {{.*}}, align 8
+  // CHECK: store volatile {{.*}}, {{.*}}, align 8
 }
 void test9(volatile baz *p, baz *q) {
   *p = *q;
   // CHECK-LABEL: @test9
-  // CHECK: store volatile {{.*}}, {{.*}}
-  // CHECK: store volatile {{.*}}, {{.*}}
+  // CHECK: store volatile {{.*}}, {{.*}}, align 8
+  // CHECK: store volatile {{.*}}, {{.*}}, align 4
 }
 void test10(volatile long long *p, volatile long long *q) {
   *p = *q;
   // CHECK-LABEL: @test10
-  // CHECK: load volatile {{.*}}
-  // CHECK: store volatile {{.*}}, {{.*}}
+  // CHECK: load volatile {{.*}}, align 8
+  // CHECK: store volatile {{.*}}, {{.*}}, align 8
 }
 void test11(volatile float *p, volatile float *q) {
   *p = *q;
   // CHECK-LABEL: @test11
-  // CHECK: load atomic volatile {{.*}} acquire
-  // CHECK: store atomic volatile {{.*}}, {{.*}} release
+  // CHECK: load atomic volatile {{.*}} acquire, align 4
+  // CHECK: store atomic volatile {{.*}}, {{.*}} release, align 4
 }
 int test12(struct qux *p) {
   return p->f;
   // CHECK-LABEL: @test12
-  // CHECK: load volatile {{.*}}
+  // CHECK: load volatile {{.*}}, align 1
 }

diff  --git a/clang/test/CodeGen/pr45476.cpp b/clang/test/CodeGen/pr45476.cpp
index dd97013ae674..84e7a984a1a2 100644
--- a/clang/test/CodeGen/pr45476.cpp
+++ b/clang/test/CodeGen/pr45476.cpp
@@ -15,8 +15,7 @@ extern "C" void foo() {
   // LIBCALL-LABEL: @foo
   // LIBCALL: call void @__atomic_store
   // NATIVE-LABEL: @foo
-  // NATIVE: store atomic i32
+  // NATIVE: store atomic i32 {{.*}} seq_cst, align 4
 
   a = s3{1, 2, 3};
 }
-

diff  --git a/clang/test/CodeGenCXX/atomic-align.cpp b/clang/test/CodeGenCXX/atomic-align.cpp
index 9852ac38a6c8..624de9286f52 100644
--- a/clang/test/CodeGenCXX/atomic-align.cpp
+++ b/clang/test/CodeGenCXX/atomic-align.cpp
@@ -9,7 +9,7 @@ AM load1() {
   // m is declared to align to 8bytes, so generate load atomic instead
   // of libcall.
   // CHECK-LABEL: @_Z5load1v
-  // CHECK: load atomic {{.*}} monotonic
+  // CHECK: load atomic {{.*}} monotonic, align 8
   __atomic_load(&m, &am, 0);
   return am;
 }
@@ -24,7 +24,7 @@ AM load2() {
   // BM::f2 is declared to align to 8bytes, so generate load atomic instead
   // of libcall.
   // CHECK-LABEL: @_Z5load2v
-  // CHECK: load atomic {{.*}} monotonic
+  // CHECK: load atomic {{.*}} monotonic, align 8
   __atomic_load(&bm.f2, &am, 0);
   return am;
 }

diff  --git a/clang/test/CodeGenCXX/atomic-inline.cpp b/clang/test/CodeGenCXX/atomic-inline.cpp
index fe727589d2e2..b8c3fc880d9b 100644
--- a/clang/test/CodeGenCXX/atomic-inline.cpp
+++ b/clang/test/CodeGenCXX/atomic-inline.cpp
@@ -9,9 +9,9 @@ AM8 m8;
 AM8 load8() {
   AM8 am;
   // CHECK-LABEL: @_Z5load8v
-  // CHECK: load atomic i64, {{.*}} monotonic
+  // CHECK: load atomic i64, {{.*}} monotonic, align 8
   // CORE2-LABEL: @_Z5load8v
-  // CORE2: load atomic i64, {{.*}} monotonic
+  // CORE2: load atomic i64, {{.*}} monotonic, align 8
   __atomic_load(&m8, &am, 0);
   return am;
 }
@@ -19,18 +19,18 @@ AM8 load8() {
 AM8 s8;
 void store8() {
   // CHECK-LABEL: @_Z6store8v
-  // CHECK: store atomic i64 {{.*}} monotonic
+  // CHECK: store atomic i64 {{.*}} monotonic, align 8
   // CORE2-LABEL: @_Z6store8v
-  // CORE2: store atomic i64 {{.*}} monotonic
+  // CORE2: store atomic i64 {{.*}} monotonic, align 8
   __atomic_store(&m8, &s8, 0);
 }
 
 bool cmpxchg8() {
   AM8 am;
   // CHECK-LABEL: @_Z8cmpxchg8v
-  // CHECK: cmpxchg i64* {{.*}} monotonic
+  // CHECK: cmpxchg i64* {{.*}} monotonic, align 8
   // CORE2-LABEL: @_Z8cmpxchg8v
-  // CORE2: cmpxchg i64* {{.*}} monotonic
+  // CORE2: cmpxchg i64* {{.*}} monotonic, align 8
   return __atomic_compare_exchange(&m8, &s8, &am, 0, 0, 0);
 }
 
@@ -44,7 +44,7 @@ AM16 load16() {
   // CHECK-LABEL: @_Z6load16v
   // CHECK: call void @__atomic_load
   // CORE2-LABEL: @_Z6load16v
-  // CORE2: load atomic i128, {{.*}} monotonic
+  // CORE2: load atomic i128, {{.*}} monotonic, align 16
   __atomic_load(&m16, &am, 0);
   return am;
 }
@@ -54,7 +54,7 @@ void store16() {
   // CHECK-LABEL: @_Z7store16v
   // CHECK: call void @__atomic_store
   // CORE2-LABEL: @_Z7store16v
-  // CORE2: store atomic i128 {{.*}} monotonic
+  // CORE2: store atomic i128 {{.*}} monotonic, align 16
   __atomic_store(&m16, &s16, 0);
 }
 
@@ -63,7 +63,6 @@ bool cmpxchg16() {
   // CHECK-LABEL: @_Z9cmpxchg16v
   // CHECK: call zeroext i1 @__atomic_compare_exchange
   // CORE2-LABEL: @_Z9cmpxchg16v
-  // CORE2: cmpxchg i128* {{.*}} monotonic
+  // CORE2: cmpxchg i128* {{.*}} monotonic monotonic, align 16
   return __atomic_compare_exchange(&m16, &s16, &am, 0, 0, 0);
 }
-

diff  --git a/clang/test/CodeGenCXX/atomic.cpp b/clang/test/CodeGenCXX/atomic.cpp
index 653f16d26309..f40cb6d6ad14 100644
--- a/clang/test/CodeGenCXX/atomic.cpp
+++ b/clang/test/CodeGenCXX/atomic.cpp
@@ -9,7 +9,7 @@ namespace PR11411 {
   // CHECK-NOT: ret
   template<typename _Tp> inline void Ptr<_Tp>::f() {
     int* _refcount;
-    // CHECK: atomicrmw add i32*
+    // CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4
     __sync_fetch_and_add(_refcount, 1);
     // CHECK-NEXT: ret void
   }

diff  --git a/clang/test/CodeGenCXX/atomicinit.cpp b/clang/test/CodeGenCXX/atomicinit.cpp
index e5848f8b6254..8b2bec2fbbdf 100644
--- a/clang/test/CodeGenCXX/atomicinit.cpp
+++ b/clang/test/CodeGenCXX/atomicinit.cpp
@@ -15,7 +15,7 @@ struct A {
   void v(int j);
 };
 // Storing to atomic values should be atomic
-// CHECK: store atomic i32
+// CHECK: store atomic i32 {{.*}} seq_cst, align 4
 void A::v(int j) { i = j; }
 // Initialising atomic values should not be atomic
 // CHECK-NOT: store atomic 

diff  --git a/clang/test/CodeGenCXX/cxx1z-decomposition.cpp b/clang/test/CodeGenCXX/cxx1z-decomposition.cpp
index 1ce408b7ba8c..d4b8203a634b 100644
--- a/clang/test/CodeGenCXX/cxx1z-decomposition.cpp
+++ b/clang/test/CodeGenCXX/cxx1z-decomposition.cpp
@@ -120,7 +120,7 @@ void test_bitfield(A &a) {
 // CHECK-LABEL: define {{.*}}@_Z18test_static_simple
 void test_static_simple() {
   static auto [x1, x2] = make<A>();
-  // CHECK: load atomic {{.*}}i64* @_ZGVZ18test_static_simplevEDC2x12x2E
+  // CHECK: load atomic i8, {{.*}}@_ZGVZ18test_static_simplevEDC2x12x2E{{.*}} acquire, align 8
   // CHECK: br i1
   // CHECK: @__cxa_guard_acquire(
   // CHECK: call {{.*}} @_Z4makeI1AERT_v(
@@ -138,14 +138,14 @@ int test_static_tuple() {
   static auto [x1, x2] = make<B>();
 
   // Initialization of the implied variable.
-  // CHECK: load atomic {{.*}} @_ZGVZ17test_static_tuplevEDC2x12x2E
+  // CHECK: load atomic i8, {{.*}}@_ZGVZ17test_static_tuplevEDC2x12x2E{{.*}} acquire, align 8
   // CHECK: br i1
   // CHECK: @__cxa_guard_acquire({{.*}} @_ZGVZ17test_static_tuplevEDC2x12x2E)
   // CHECK: call {{.*}} @_Z4makeI1BERT_v(
   // CHECK: @__cxa_guard_release({{.*}} @_ZGVZ17test_static_tuplevEDC2x12x2E)
 
   // Initialization of the secret 'x1' variable.
-  // CHECK: load atomic {{.*}} @_ZGVZ17test_static_tuplevE2x1
+  // CHECK: load atomic i8, {{.*}}@_ZGVZ17test_static_tuplevE2x1{{.*}} acquire, align 8
   // CHECK: br i1
   // CHECK: @__cxa_guard_acquire({{.*}} @_ZGVZ17test_static_tuplevE2x1)
   // CHECK: call {{.*}} @_Z3getILi0EEDa1B(
@@ -155,7 +155,7 @@ int test_static_tuple() {
   // CHECK: call void @__cxa_guard_release({{.*}} @_ZGVZ17test_static_tuplevE2x1)
 
   // Initialization of the secret 'x2' variable.
-  // CHECK: load atomic {{.*}} @_ZGVZ17test_static_tuplevE2x2
+  // CHECK: load atomic i8, {{.*}}@_ZGVZ17test_static_tuplevE2x2{{.*}} acquire, align 8
   // CHECK: br i1
   // CHECK: @__cxa_guard_acquire({{.*}} @_ZGVZ17test_static_tuplevE2x2)
   // CHECK: call {{.*}} @_Z3getILi1EEDa1B(

diff  --git a/clang/test/CodeGenCXX/cxx1z-inline-variables.cpp b/clang/test/CodeGenCXX/cxx1z-inline-variables.cpp
index 13474402230c..a616e95a2be5 100644
--- a/clang/test/CodeGenCXX/cxx1z-inline-variables.cpp
+++ b/clang/test/CodeGenCXX/cxx1z-inline-variables.cpp
@@ -93,7 +93,7 @@ const int &yib = Y<int>::b;
 // CHECK-LABEL: define {{.*}}global_var_init
 // CHECK-NOT: comdat
 // CHECK-SAME: {{$}}
-// CHECK: load atomic {{.*}} acquire
+// CHECK: load atomic {{.*}} acquire, align
 // CHECK: br
 // CHECK: __cxa_guard_acquire(i64* @_ZGV1b)
 // CHECK: br

diff  --git a/clang/test/CodeGenCXX/static-init-pnacl.cpp b/clang/test/CodeGenCXX/static-init-pnacl.cpp
index ba06420431e7..0d607369f178 100644
--- a/clang/test/CodeGenCXX/static-init-pnacl.cpp
+++ b/clang/test/CodeGenCXX/static-init-pnacl.cpp
@@ -9,6 +9,6 @@ int f();
 void g() {
   static int a = f();
 }
-// CHECK: [[LOAD:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ1gvE1a to i8*) acquire
+// CHECK: [[LOAD:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ1gvE1a to i8*) acquire, align 8
 // CHECK-NEXT: [[GUARD:%.*]] = icmp eq i8 [[LOAD]], 0
 // CHECK-NEXT: br i1 [[GUARD]]

diff  --git a/clang/test/CodeGenCXX/static-init.cpp b/clang/test/CodeGenCXX/static-init.cpp
index 5ef8733c7c0d..0711539cde2a 100644
--- a/clang/test/CodeGenCXX/static-init.cpp
+++ b/clang/test/CodeGenCXX/static-init.cpp
@@ -109,14 +109,14 @@ namespace test2 {
     static int x = foo();
   }
   // CHECK-LABEL: define{{.*}} void @_ZN5test21BC2Ev
-  // CHECK:   load atomic i8, i8* bitcast (i64* @_ZGVZN5test21BC1EvE1x to i8*) acquire,
+  // CHECK:   load atomic i8, i8* bitcast (i64* @_ZGVZN5test21BC1EvE1x to i8*) acquire, align 8
   // CHECK:   call i32 @__cxa_guard_acquire(i64* @_ZGVZN5test21BC1EvE1x)
   // CHECK:   [[T0:%.*]] = call i32 @_ZN5test23fooEv()
   // CHECK:   store i32 [[T0]], i32* @_ZZN5test21BC1EvE1x,
   // CHECK:   call void @__cxa_guard_release(i64* @_ZGVZN5test21BC1EvE1x)
 
   // CHECK-LABEL: define{{.*}} void @_ZN5test21BC1Ev
-  // CHECK:   load atomic i8, i8* bitcast (i64* @_ZGVZN5test21BC1EvE1x to i8*) acquire,
+  // CHECK:   load atomic i8, i8* bitcast (i64* @_ZGVZN5test21BC1EvE1x to i8*) acquire, align 8
   // CHECK:   call i32 @__cxa_guard_acquire(i64* @_ZGVZN5test21BC1EvE1x)
   // CHECK:   [[T0:%.*]] = call i32 @_ZN5test23fooEv()
   // CHECK:   store i32 [[T0]], i32* @_ZZN5test21BC1EvE1x,
@@ -128,7 +128,7 @@ namespace test2 {
     static int y = foo();
   }
   // CHECK-LABEL: define{{.*}} void @_ZN5test21BD2Ev(
-  // CHECK:   load atomic i8, i8* bitcast (i64* @_ZGVZN5test21BD1EvE1y to i8*) acquire,
+  // CHECK:   load atomic i8, i8* bitcast (i64* @_ZGVZN5test21BD1EvE1y to i8*) acquire, align 8
   // CHECK:   call i32 @__cxa_guard_acquire(i64* @_ZGVZN5test21BD1EvE1y)
   // CHECK:   [[T0:%.*]] = call i32 @_ZN5test23fooEv()
   // CHECK:   store i32 [[T0]], i32* @_ZZN5test21BD1EvE1y,

diff  --git a/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
index f7ae625b536e..383348a8a13c 100644
--- a/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
+++ b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp
@@ -11,7 +11,7 @@ S global;
 // FIXME: Do we really need thread-safe initialization here? We don't run
 // global ctors on multiple threads. (If we were to do so, we'd need thread-safe
 // init for B<int>::member and B<int>::inline_member too.)
-// CHECK: load atomic i8, i8* bitcast (i64* @_ZGV13inline_global to i8*) acquire,
+// CHECK: load atomic i8, i8* bitcast (i64* @_ZGV13inline_global to i8*) acquire, align 8
 // CHECK: icmp eq i8 {{.*}}, 0
 // CHECK: br i1
 // CHECK-NOT: !prof
@@ -36,7 +36,7 @@ struct A {
   static thread_local S thread_local_member;
 
   // CHECK-LABEL: define {{.*}}global_var_init
-  // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVN1A13inline_memberE to i8*) acquire,
+  // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVN1A13inline_memberE to i8*) acquire, align 8
   // CHECK: icmp eq i8 {{.*}}, 0
   // CHECK: br i1
   // CHECK-NOT: !prof
@@ -54,7 +54,7 @@ struct A {
 
 // CHECK-LABEL: define{{.*}} void @_Z1fv()
 void f() {
-  // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVZ1fvE12static_local to i8*) acquire,
+  // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVZ1fvE12static_local to i8*) acquire, align 8
   // CHECK: icmp eq i8 {{.*}}, 0
   // CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_LOCAL:[0-9]*]]
   static S static_local;

diff  --git a/clang/test/CodeGenObjC/property-atomic-bool.m b/clang/test/CodeGenObjC/property-atomic-bool.m
index 77da129f6c04..b53e8322ab5a 100644
--- a/clang/test/CodeGenObjC/property-atomic-bool.m
+++ b/clang/test/CodeGenObjC/property-atomic-bool.m
@@ -1,21 +1,21 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10 -emit-llvm -x objective-c %s -o - | FileCheck %s
 
 // CHECK: define internal zeroext i1 @"\01-[A0 p]"(
-// CHECK:   %[[ATOMIC_LOAD:.*]] = load atomic i8, i8* %{{.*}} seq_cst
+// CHECK:   %[[ATOMIC_LOAD:.*]] = load atomic i8, i8* %{{.*}} seq_cst, align 1
 // CHECK:   %[[TOBOOL:.*]] = trunc i8 %[[ATOMIC_LOAD]] to i1
 // CHECK:   ret i1 %[[TOBOOL]]
 
 // CHECK: define internal void @"\01-[A0 setP:]"({{.*}} i1 zeroext {{.*}})
-// CHECK:   store atomic i8 %{{.*}}, i8* %{{.*}} seq_cst
+// CHECK:   store atomic i8 %{{.*}}, i8* %{{.*}} seq_cst, align 1
 // CHECK:   ret void
 
 // CHECK: define internal zeroext i1 @"\01-[A1 p]"(
-// CHECK:   %[[ATOMIC_LOAD:.*]] = load atomic i8, i8* %{{.*}} unordered
+// CHECK:   %[[ATOMIC_LOAD:.*]] = load atomic i8, i8* %{{.*}} unordered, align 1
 // CHECK:   %[[TOBOOL:.*]] = trunc i8 %load to i1
 // CHECK:   ret i1 %[[TOBOOL]]
 
 // CHECK: define internal void @"\01-[A1 setP:]"({{.*}} i1 zeroext %p)
-// CHECK:   store atomic i8 %{{.*}}, i8* %{{.*}} unordered
+// CHECK:   store atomic i8 %{{.*}}, i8* %{{.*}} unordered, align 1
 // CHECK:   ret void
 
 @interface A0

diff  --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl
index 88f2e0d0ea4a..bd5a01c5434a 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -32,58 +32,58 @@ atomic_int j;
 
 void fi1(atomic_int *i) {
   // CHECK-LABEL: @fi1
-  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
 
-  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst, align 4
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
 
-  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst, align 4
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
 
-  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst, align 4
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
 }
 
 void fi2(atomic_int *i) {
   // CHECK-LABEL: @fi2
-  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
 }
 
 void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) {
   // CHECK-LABEL: @test_addr
-  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group);
 }
 
 void fi3(atomic_int *i, atomic_uint *ui) {
   // CHECK-LABEL: @fi3
-  // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
   x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
 }
 
 bool fi4(atomic_int *i) {
   // CHECK-LABEL: @fi4(
-  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire
+  // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire, align 4
   // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
   // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
   // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
@@ -100,16 +100,16 @@ void fi5(atomic_int *i, int scope) {
   // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]]
   // CHECK-NEXT: ]
   // CHECK: [[opencl_workgroup]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst, align 4
   // CHECK: br label %[[continue:.*]]
   // CHECK: [[opencl_device]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst, align 4
   // CHECK: br label %[[continue]]
   // CHECK: [[opencl_allsvmdevices]]:
-  // CHECK: load atomic i32, i32* %{{.*}} seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} seq_cst, align 4
   // CHECK: br label %[[continue]]
   // CHECK: [[opencl_subgroup]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst, align 4
   // CHECK: br label %[[continue]]
   // CHECK: [[continue]]:
   int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
@@ -141,47 +141,47 @@ void fi6(atomic_int *i, int order, int scope) {
   // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
   // CHECK-NEXT: ]
   // CHECK: [[MON_WG]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
   // CHECK: [[MON_DEV]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic, align 4
   // CHECK: [[MON_ALL]]:
-  // CHECK: load atomic i32, i32* %{{.*}} monotonic
+  // CHECK: load atomic i32, i32* %{{.*}} monotonic, align 4
   // CHECK: [[MON_SUB]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
   // CHECK: [[ACQ_WG]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire, align 4
   // CHECK: [[ACQ_DEV]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire, align 4
   // CHECK: [[ACQ_ALL]]:
-  // CHECK: load atomic i32, i32* %{{.*}} acquire
+  // CHECK: load atomic i32, i32* %{{.*}} acquire, align 4
   // CHECK: [[ACQ_SUB]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire, align 4
   // CHECK: [[SEQ_WG]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst, align 4
   // CHECK: [[SEQ_DEV]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst, align 4
   // CHECK: [[SEQ_ALL]]:
-  // CHECK: load atomic i32, i32* %{{.*}} seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} seq_cst, align 4
   // CHECK: [[SEQ_SUB]]:
-  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst
+  // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst, align 4
   int x = __opencl_atomic_load(i, order, scope);
 }
 
 float ff1(global atomic_float *d) {
   // CHECK-LABEL: @ff1
-  // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
+  // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic, align 4
   return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
 }
 
 void ff2(atomic_float *d) {
   // CHECK-LABEL: @ff2
-  // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release
+  // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release, align 4
   __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
 }
 
 float ff3(atomic_float *d) {
   // CHECK-LABEL: @ff3
-  // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst
+  // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst, align 4
   return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
 }
 
@@ -198,10 +198,10 @@ void atomic_init_foo()
 
 // CHECK-LABEL: @failureOrder
 void failureOrder(atomic_int *ptr, int *ptr2) {
-  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic
+  // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic, align 4
   __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
 
-  // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
+  // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire, align 4
   __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
 }
 
@@ -243,35 +243,35 @@ void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
   // CHECK-NEXT: ]
 
   // CHECK: [[MONOTONIC_MONOTONIC]]
-  // CHECK: cmpxchg {{.*}} monotonic monotonic
+  // CHECK: cmpxchg {{.*}} monotonic monotonic, align 4
   // CHECK: br
 
   // CHECK: [[ACQUIRE_MONOTONIC]]
-  // CHECK: cmpxchg {{.*}} acquire monotonic
+  // CHECK: cmpxchg {{.*}} acquire monotonic, align 4
   // CHECK: br
 
   // CHECK: [[ACQUIRE_ACQUIRE]]
-  // CHECK: cmpxchg {{.*}} acquire acquire
+  // CHECK: cmpxchg {{.*}} acquire acquire, align 4
   // CHECK: br
 
   // CHECK: [[ACQREL_MONOTONIC]]
-  // CHECK: cmpxchg {{.*}} acq_rel monotonic
+  // CHECK: cmpxchg {{.*}} acq_rel monotonic, align 4
   // CHECK: br
 
   // CHECK: [[ACQREL_ACQUIRE]]
-  // CHECK: cmpxchg {{.*}} acq_rel acquire
+  // CHECK: cmpxchg {{.*}} acq_rel acquire, align 4
   // CHECK: br
 
   // CHECK: [[SEQCST_MONOTONIC]]
-  // CHECK: cmpxchg {{.*}} seq_cst monotonic
+  // CHECK: cmpxchg {{.*}} seq_cst monotonic, align 4
   // CHECK: br
 
   // CHECK: [[SEQCST_ACQUIRE]]
-  // CHECK: cmpxchg {{.*}} seq_cst acquire
+  // CHECK: cmpxchg {{.*}} seq_cst acquire, align 4
   // CHECK: br
 
   // CHECK: [[SEQCST_SEQCST]]
-  // CHECK: cmpxchg {{.*}} seq_cst seq_cst
+  // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align 4
   // CHECK: br
 }
 
@@ -281,7 +281,7 @@ int test_volatile(volatile atomic_int *i) {
   // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
   // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]]
   // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]]
-  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst
+  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst, align 4
   // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]]
   // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]]
   // CHECK-NEXT: ret i32 %[[retval]]

diff  --git a/clang/test/OpenMP/atomic_capture_codegen.cpp b/clang/test/OpenMP/atomic_capture_codegen.cpp
index 047ec3616afc..62e41c8f2693 100644
--- a/clang/test/OpenMP/atomic_capture_codegen.cpp
+++ b/clang/test/OpenMP/atomic_capture_codegen.cpp
@@ -83,27 +83,27 @@ float2 float2x;
 register int rix __asm__("esp");
 
 int main() {
-// CHECK: [[PREV:%.+]] = atomicrmw add i8* @{{.+}}, i8 1 monotonic
+// CHECK: [[PREV:%.+]] = atomicrmw add i8* @{{.+}}, i8 1 monotonic, align 1
 // CHECK: store i8 [[PREV]], i8* @{{.+}},
 #pragma omp atomic capture
   bv = bx++;
-// CHECK: atomicrmw add i8* @{{.+}}, i8 1 monotonic
+// CHECK: atomicrmw add i8* @{{.+}}, i8 1 monotonic, align 1
 // CHECK: add nsw i32 %{{.+}}, 1
 // CHECK: store i8 %{{.+}}, i8* @{{.+}},
 #pragma omp atomic capture
   cv = ++cx;
-// CHECK: [[PREV:%.+]] = atomicrmw sub i8* @{{.+}}, i8 1 monotonic
+// CHECK: [[PREV:%.+]] = atomicrmw sub i8* @{{.+}}, i8 1 monotonic, align 1
 // CHECK: store i8 [[PREV]], i8* @{{.+}},
 #pragma omp atomic capture
   ucv = ucx--;
-// CHECK: atomicrmw sub i16* @{{.+}}, i16 1 monotonic
+// CHECK: atomicrmw sub i16* @{{.+}}, i16 1 monotonic, align 2
 // CHECK: sub nsw i32 %{{.+}}, 1
 // CHECK: store i16 %{{.+}}, i16* @{{.+}},
 #pragma omp atomic capture
   sv = --sx;
 // CHECK: [[USV:%.+]] = load i16, i16* @{{.+}},
 // CHECK: [[EXPR:%.+]] = zext i16 [[USV]] to i32
-// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic, align 2
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i16 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -112,7 +112,7 @@ int main() {
 // CHECK: [[DESIRED_CALC:%.+]] = trunc i32 [[ADD]] to i16
 // CHECK: store i16 [[DESIRED_CALC]], i16* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i16, i16* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic, align 2
 // CHECK: [[OLD_X]] = extractvalue { i16, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i16, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -121,14 +121,14 @@ int main() {
 #pragma omp atomic capture
   sv = usx += usv;
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED_CALC:%.+]] = mul nsw i32 [[EXPECTED]], [[EXPR]]
 // CHECK: store i32 [[DESIRED_CALC]], i32* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[OLD_X]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -137,19 +137,19 @@ int main() {
 #pragma omp atomic capture
   uiv = ix *= iv;
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}},
-// CHECK: [[PREV:%.+]] = atomicrmw sub i32* @{{.+}}, i32 [[EXPR]] monotonic
+// CHECK: [[PREV:%.+]] = atomicrmw sub i32* @{{.+}}, i32 [[EXPR]] monotonic, align 4
 // CHECK: store i32 [[PREV]], i32* @{{.+}},
 #pragma omp atomic capture
   {iv = uix; uix -= uiv;}
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED_CALC:%.+]] = shl i32 [[EXPECTED]], [[EXPR]]
 // CHECK: store i32 [[DESIRED_CALC]], i32* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[OLD_X]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -158,14 +158,14 @@ int main() {
 #pragma omp atomic capture
   {ix <<= iv; uiv = ix;}
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED_CALC:%.+]] = lshr i32 [[EXPECTED]], [[EXPR]]
 // CHECK: store i32 [[DESIRED_CALC]], i32* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[OLD_X]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -174,14 +174,14 @@ int main() {
 #pragma omp atomic capture
   iv = uix >>= uiv;
 // CHECK: [[EXPR:%.+]] = load i64, i64* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i64, i64* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i64, i64* [[X_ADDR:@.+]] monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED:%.+]] = sdiv i64 [[EXPECTED]], [[EXPR]]
 // CHECK: store i64 [[DESIRED]], i64* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* [[X_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* [[X_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[OLD_X]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -190,25 +190,25 @@ int main() {
 #pragma omp atomic capture
   {ulv = lx; lx /= lv;}
 // CHECK: [[EXPR:%.+]] = load i64, i64* @{{.+}},
-// CHECK: [[OLD:%.+]] = atomicrmw and i64* @{{.+}}, i64 [[EXPR]] monotonic
+// CHECK: [[OLD:%.+]] = atomicrmw and i64* @{{.+}}, i64 [[EXPR]] monotonic, align 8
 // CHECK: [[DESIRED:%.+]] = and i64 [[OLD]], [[EXPR]]
 // CHECK:  store i64 [[DESIRED]], i64* @{{.+}},
 #pragma omp atomic capture
   {ulx &= ulv; lv = ulx;}
 // CHECK: [[EXPR:%.+]] = load i64, i64* @{{.+}},
-// CHECK: [[OLD:%.+]] = atomicrmw xor i64* @{{.+}}, i64 [[EXPR]] monotonic
+// CHECK: [[OLD:%.+]] = atomicrmw xor i64* @{{.+}}, i64 [[EXPR]] monotonic, align 8
 // CHECK: [[DESIRED:%.+]] = xor i64 [[OLD]], [[EXPR]]
 // CHECK:  store i64 [[DESIRED]], i64* @{{.+}},
 #pragma omp atomic capture
   ullv = llx ^= llv;
 // CHECK: [[EXPR:%.+]] = load i64, i64* @{{.+}},
-// CHECK: [[OLD:%.+]] = atomicrmw or i64* @{{.+}}, i64 [[EXPR]] monotonic
+// CHECK: [[OLD:%.+]] = atomicrmw or i64* @{{.+}}, i64 [[EXPR]] monotonic, align 8
 // CHECK: [[DESIRED:%.+]] = or i64 [[OLD]], [[EXPR]]
 // CHECK:  store i64 [[DESIRED]], i64* @{{.+}},
 #pragma omp atomic capture
   llv = ullx |= ullv;
 // CHECK: [[EXPR:%.+]] = load float, float* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic
+// CHECK: [[X:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -217,7 +217,7 @@ int main() {
 // CHECK: [[ADD:%.+]] = fadd float [[OLD]], [[EXPR]]
 // CHECK: store float [[ADD]], float* [[TEMP]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP_I]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[OLD_X:%.+]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -227,7 +227,7 @@ int main() {
 #pragma omp atomic capture
   dv = fx = fx + fv;
 // CHECK: [[EXPR:%.+]] = load double, double* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i64, i64*  bitcast (double* [[X_ADDR:@.+]] to i64*) monotonic
+// CHECK: [[X:%.+]] = load atomic i64, i64*  bitcast (double* [[X_ADDR:@.+]] to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -236,7 +236,7 @@ int main() {
 // CHECK: [[SUB:%.+]] = fsub double [[EXPR]], [[OLD]]
 // CHECK: store double [[SUB]], double* [[TEMP]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP_I]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (double* [[X_ADDR]] to i64*), i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (double* [[X_ADDR]] to i64*), i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[OLD_X:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -246,7 +246,7 @@ int main() {
 #pragma omp atomic capture
   {fv = dx; dx = dv - dx;}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i128, i128*  bitcast (x86_fp80* [[X_ADDR:@.+]] to i128*) monotonic
+// CHECK: [[X:%.+]] = load atomic i128, i128*  bitcast (x86_fp80* [[X_ADDR:@.+]] to i128*) monotonic, align 16
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i128 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -258,7 +258,7 @@ int main() {
 // CHECK: [[MUL:%.+]] = fmul x86_fp80 [[OLD]], [[EXPR]]
 // CHECK: store x86_fp80 [[MUL]], x86_fp80* [[TEMP]]
 // CHECK: [[DESIRED:%.+]] = load i128, i128* [[BITCAST]]
-// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (x86_fp80* [[X_ADDR]] to i128*), i128 [[EXPECTED]], i128 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (x86_fp80* [[X_ADDR]] to i128*), i128 [[EXPECTED]], i128 [[DESIRED]] monotonic monotonic, align 16
 // CHECK: [[OLD_X:%.+]] = extractvalue { i128, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i128, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -349,14 +349,14 @@ int main() {
 // CHECK: [[BV:%.+]] = load i8, i8* @{{.+}}
 // CHECK: [[BOOL:%.+]] = trunc i8 [[BV]] to i1
 // CHECK: [[EXPR:%.+]] = zext i1 [[BOOL]] to i64
-// CHECK: [[OLD:%.+]] = atomicrmw and i64* @{{.+}}, i64 [[EXPR]] monotonic
+// CHECK: [[OLD:%.+]] = atomicrmw and i64* @{{.+}}, i64 [[EXPR]] monotonic, align 8
 // CHECK: [[DESIRED:%.+]] = and i64 [[OLD]], [[EXPR]]
 // CHECK: store i64 [[DESIRED]], i64* @{{.+}},
 #pragma omp atomic capture
   ulv = ulx = ulx & bv;
 // CHECK: [[CV:%.+]]  = load i8, i8* @{{.+}}, align 1
 // CHECK: [[EXPR:%.+]] = sext i8 [[CV]] to i32
-// CHECK: [[X:%.+]] = load atomic i8, i8* [[BX_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i8, i8* [[BX_ADDR:@.+]] monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i8 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -367,7 +367,7 @@ int main() {
 // CHECK: [[NEW:%.+]] = zext i1 [[CAST]] to i8
 // CHECK: store i8 [[NEW]], i8* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i8, i8* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i8* [[BX_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* [[BX_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic, align 1
 // CHECK: [[OLD:%.+]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -378,7 +378,7 @@ int main() {
   {bv = bx; bx = cv & bx;}
 // CHECK: [[UCV:%.+]]  = load i8, i8* @{{.+}},
 // CHECK: [[EXPR:%.+]] = zext i8 [[UCV]] to i32
-// CHECK: [[X:%.+]] = load atomic i8, i8* [[CX_ADDR:@.+]] seq_cst
+// CHECK: [[X:%.+]] = load atomic i8, i8* [[CX_ADDR:@.+]] seq_cst, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i8 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -387,7 +387,7 @@ int main() {
 // CHECK: [[NEW:%.+]] = trunc i32 [[ASHR]] to i8
 // CHECK: store i8 [[NEW]], i8* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i8, i8* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i8* [[CX_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] seq_cst seq_cst
+// CHECK: [[RES:%.+]] = cmpxchg i8* [[CX_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] seq_cst seq_cst, align 1
 // CHECK: [[OLD_X:%.+]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -398,7 +398,7 @@ int main() {
   {cx = cx >> ucv; cv = cx;}
 // CHECK: [[SV:%.+]]  = load i16, i16* @{{.+}},
 // CHECK: [[EXPR:%.+]] = sext i16 [[SV]] to i32
-// CHECK: [[X:%.+]] = load atomic i64, i64* [[ULX_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i64, i64* [[ULX_ADDR:@.+]] monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -407,7 +407,7 @@ int main() {
 // CHECK: [[NEW:%.+]] = sext i32 [[SHL]] to i64
 // CHECK: store i64 [[NEW]], i64* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* [[ULX_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* [[ULX_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[OLD_X:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -417,14 +417,14 @@ int main() {
   ulv = ulx = sv << ulx;
 // CHECK: [[USV:%.+]]  = load i16, i16* @{{.+}},
 // CHECK: [[EXPR:%.+]] = zext i16 [[USV]] to i64
-// CHECK: [[X:%.+]] = load atomic i64, i64* [[LX_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i64, i64* [[LX_ADDR:@.+]] monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED:%.+]] = srem i64 [[EXPECTED]], [[EXPR]]
 // CHECK: store i64 [[DESIRED]], i64* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* [[LX_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* [[LX_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[OLD_X:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -433,14 +433,14 @@ int main() {
 #pragma omp atomic capture
   {lv = lx; lx = lx % usv;}
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}}
-// CHECK: [[OLD:%.+]] = atomicrmw or i32* @{{.+}}, i32 [[EXPR]] seq_cst
+// CHECK: [[OLD:%.+]] = atomicrmw or i32* @{{.+}}, i32 [[EXPR]] seq_cst, align 4
 // CHECK: [[DESIRED:%.+]] = or i32 [[EXPR]], [[OLD]]
 // CHECK: store i32 [[DESIRED]], i32* @{{.+}},
 // CHECK: call{{.*}} @__kmpc_flush(
 #pragma omp atomic seq_cst, capture
   {uix = iv | uix; uiv = uix;}
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}}
-// CHECK: [[OLD:%.+]] = atomicrmw and i32* @{{.+}}, i32 [[EXPR]] monotonic
+// CHECK: [[OLD:%.+]] = atomicrmw and i32* @{{.+}}, i32 [[EXPR]] monotonic, align 4
 // CHECK: [[DESIRED:%.+]] = and i32 [[OLD]], [[EXPR]]
 // CHECK: store i32 [[DESIRED]], i32* @{{.+}},
 #pragma omp atomic capture
@@ -470,7 +470,7 @@ int main() {
   {civ = cix; cix = lv + cix;}
 // CHECK: [[ULV:%.+]] = load i64, i64* @{{.+}},
 // CHECK: [[EXPR:%.+]] = uitofp i64 [[ULV]] to float
-// CHECK: [[X:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic
+// CHECK: [[X:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -479,7 +479,7 @@ int main() {
 // CHECK: [[MUL:%.+]] = fmul float [[OLD]], [[EXPR]]
 // CHECK: store float [[MUL]], float* [[TEMP]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP_I]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[OLD_X:%.+]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -489,7 +489,7 @@ int main() {
   {fx = fx * ulv; fv = fx;}
 // CHECK: [[LLV:%.+]] = load i64, i64* @{{.+}},
 // CHECK: [[EXPR:%.+]] = sitofp i64 [[LLV]] to double
-// CHECK: [[X:%.+]] = load atomic i64, i64*  bitcast (double* [[X_ADDR:@.+]] to i64*) monotonic
+// CHECK: [[X:%.+]] = load atomic i64, i64*  bitcast (double* [[X_ADDR:@.+]] to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -498,7 +498,7 @@ int main() {
 // CHECK: [[DIV:%.+]] = fdiv double [[OLD]], [[EXPR]]
 // CHECK: store double [[DIV]], double* [[TEMP]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP_I]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (double* [[X_ADDR]] to i64*), i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (double* [[X_ADDR]] to i64*), i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[OLD_X:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -508,7 +508,7 @@ int main() {
   dv = dx /= llv;
 // CHECK: [[ULLV:%.+]] = load i64, i64* @{{.+}},
 // CHECK: [[EXPR:%.+]] = uitofp i64 [[ULLV]] to x86_fp80
-// CHECK: [[X:%.+]] = load atomic i128, i128*  bitcast (x86_fp80* [[X_ADDR:@.+]] to i128*) monotonic
+// CHECK: [[X:%.+]] = load atomic i128, i128*  bitcast (x86_fp80* [[X_ADDR:@.+]] to i128*) monotonic, align 16
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i128 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -520,7 +520,7 @@ int main() {
 // CHECK: [[SUB:%.+]] = fsub x86_fp80 [[OLD]], [[EXPR]]
 // CHECK: store x86_fp80 [[SUB]], x86_fp80* [[TEMP1]]
 // CHECK: [[DESIRED:%.+]] = load i128, i128* [[TEMP_I1]]
-// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (x86_fp80* [[X_ADDR]] to i128*), i128 [[EXPECTED]], i128 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (x86_fp80* [[X_ADDR]] to i128*), i128 [[EXPECTED]], i128 [[DESIRED]] monotonic monotonic, align 16
 // CHECK: [[OLD_X:%.+]] = extractvalue { i128, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i128, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -552,7 +552,7 @@ int main() {
 #pragma omp atomic capture
   {cix = fv / cix; civ = cix;}
 // CHECK: [[EXPR:%.+]] = load double, double* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic, align 2
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i16 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -562,7 +562,7 @@ int main() {
 // CHECK: [[NEW:%.+]] = fptosi double [[ADD]] to i16
 // CHECK: store i16 [[NEW]], i16* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i16, i16* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic, align 2
 // CHECK: [[OLD_X]] = extractvalue { i16, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i16, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -571,7 +571,7 @@ int main() {
 #pragma omp atomic capture
   sv = sx = sx + dv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}},
-// CHECK: [[XI8:%.+]] = load atomic i8, i8* [[X_ADDR:@.+]] monotonic
+// CHECK: [[XI8:%.+]] = load atomic i8, i8* [[X_ADDR:@.+]] monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i8 [ [[XI8]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -583,7 +583,7 @@ int main() {
 // CHECK: [[DESIRED:%.+]] = zext i1 [[BOOL_DESIRED]] to i8
 // CHECK: store i8 [[DESIRED]], i8* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i8, i8* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i8* [[X_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* [[X_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic, align 1
 // CHECK: [[OLD_X:%.+]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -594,7 +594,7 @@ int main() {
   {bv = bx; bx = ldv * bx;}
 // CHECK: [[EXPR_RE:%.+]] = load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* [[CIV_ADDR:@.+]], i32 0, i32 0),
 // CHECK: [[EXPR_IM:%.+]] = load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* [[CIV_ADDR]], i32 0, i32 1),
-// CHECK: [[XI8:%.+]] = load atomic i8, i8* [[X_ADDR:@.+]] monotonic
+// CHECK: [[XI8:%.+]] = load atomic i8, i8* [[X_ADDR:@.+]] monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i8 [ [[XI8]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -608,7 +608,7 @@ int main() {
 // CHECK: [[DESIRED:%.+]] = zext i1 [[BOOL_DESIRED]] to i8
 // CHECK: store i8 [[DESIRED]], i8* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i8, i8* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i8* [[X_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* [[X_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic, align 1
 // CHECK: [[OLD_X:%.+]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -620,7 +620,7 @@ int main() {
 // CHECK: [[IDX:%.+]] = load i16, i16* @{{.+}}
 // CHECK: load i8, i8*
 // CHECK: [[VEC_ITEM_VAL:%.+]] = zext i1 %{{.+}} to i32
-// CHECK: [[I128VAL:%.+]] = load atomic i128, i128* bitcast (<4 x i32>* [[DEST:@.+]] to i128*) monotonic
+// CHECK: [[I128VAL:%.+]] = load atomic i128, i128* bitcast (<4 x i32>* [[DEST:@.+]] to i128*) monotonic, align 16
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_I128:%.+]] = phi i128 [ [[I128VAL]], %{{.+}} ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -635,7 +635,7 @@ int main() {
 // CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <4 x i32> [[VEC_VAL]], i32 [[OR]], i16 [[IDX]]
 // CHECK: store <4 x i32> [[NEW_VEC_VAL]], <4 x i32>* [[TEMP]]
 // CHECK: [[NEW_I128:%.+]] = load i128, i128* [[TEMP_I]],
-// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (<4 x i32>* [[DEST]] to i128*), i128 [[OLD_I128]], i128 [[NEW_I128]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (<4 x i32>* [[DEST]] to i128*), i128 [[OLD_I128]], i128 [[NEW_I128]] monotonic monotonic, align 16
 // CHECK: [[FAILED_OLD_VAL:%.+]] = extractvalue { i128, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -644,7 +644,7 @@ int main() {
 #pragma omp atomic capture
   {int4x[sv] |= bv; iv = int4x[sv];}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -662,7 +662,7 @@ int main() {
 // CHECK: [[BF_SET:%.+]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 [[BF_SET]], i32* [[TEMP1]],
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i32, i32* [[TEMP1]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -699,7 +699,7 @@ int main() {
 #pragma omp atomic capture
   {iv = bfx_packed.a; bfx_packed.a *= ldv;}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -717,7 +717,7 @@ int main() {
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, i32* [[TEMP1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i32, i32* [[TEMP1]]
-// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -726,7 +726,7 @@ int main() {
 #pragma omp atomic capture
   {bfx2.a -= ldv; iv = bfx2.a;}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3) monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -748,7 +748,7 @@ int main() {
 // CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i8 %{{.+}}, i8* [[BITCAST_NEW]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i8, i8* [[BITCAST_NEW]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -757,7 +757,7 @@ int main() {
 #pragma omp atomic capture
   iv = bfx2_packed.a = ldv / bfx2_packed.a;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -776,7 +776,7 @@ int main() {
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, i32* [[TEMP1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i32, i32* [[TEMP1]]
-// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -817,7 +817,7 @@ int main() {
 #pragma omp atomic capture
   {bfx3_packed.a += ldv; iv = bfx3_packed.a;}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -838,7 +838,7 @@ int main() {
 // CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i64 %{{.+}}, i64* [[TEMP1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[TEMP1]]
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic, align 8
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -847,7 +847,7 @@ int main() {
 #pragma omp atomic relaxed capture
   iv = bfx4.a = bfx4.a * ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %{{.+}} ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -869,7 +869,7 @@ int main() {
 // CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i8 %{{.+}}, i8* [[BITCAST1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i8, i8* [[BITCAST1]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -878,7 +878,7 @@ int main() {
 #pragma omp atomic capture relaxed
   {iv = bfx4_packed.a; bfx4_packed.a -= ldv;}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -897,7 +897,7 @@ int main() {
 // CHECK: [[VAL:%.+]] = or i64 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i64 [[VAL]], i64* [[TEMP1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[TEMP1]]
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] release monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] release monotonic, align 8
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -908,7 +908,7 @@ int main() {
 #pragma omp atomic capture release
   {bfx4.b /= ldv; iv = bfx4.b;}
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) acquire
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) acquire, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -930,7 +930,7 @@ int main() {
 // CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i8 %{{.+}}, i8* [[BITCAST1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i8, i8* [[BITCAST1]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] acquire acquire
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] acquire acquire, align 1
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -942,7 +942,7 @@ int main() {
   iv = bfx4_packed.b += ldv;
 // CHECK: load i64, i64*
 // CHECK: [[EXPR:%.+]] = uitofp i64 %{{.+}} to float
-// CHECK: [[I64VAL:%.+]] = load atomic i64, i64* bitcast (<2 x float>* [[DEST:@.+]] to i64*) acquire
+// CHECK: [[I64VAL:%.+]] = load atomic i64, i64* bitcast (<2 x float>* [[DEST:@.+]] to i64*) acquire, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_I64:%.+]] = phi i64 [ [[I64VAL]], %{{.+}} ], [ [[FAILED_I64_OLD_VAL:%.+]], %[[CONT]] ]
@@ -957,7 +957,7 @@ int main() {
 // CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <2 x float> [[VEC_VAL]], float [[VEC_ITEM_VAL]], i64 0
 // CHECK: store <2 x float> [[NEW_VEC_VAL]], <2 x float>* [[LDTEMP1]]
 // CHECK: [[NEW_I64:%.+]] = load i64, i64* [[BITCAST]]
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (<2 x float>* [[DEST]] to i64*), i64 [[OLD_I64]], i64 [[NEW_I64]] acq_rel acquire
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (<2 x float>* [[DEST]] to i64*), i64 [[OLD_I64]], i64 [[NEW_I64]] acq_rel acquire, align 8
 // CHECK: [[FAILED_I64_OLD_VAL:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -976,7 +976,7 @@ int main() {
 // CHECK: call{{.*}} @__kmpc_flush(
 #pragma omp atomic capture seq_cst
   {rix = dv / rix; iv = rix;}
-// CHECK: [[OLD_VAL:%.+]] = atomicrmw xchg i32* @{{.+}}, i32 5 monotonic
+// CHECK: [[OLD_VAL:%.+]] = atomicrmw xchg i32* @{{.+}}, i32 5 monotonic, align 4
 // CHECK: call void @llvm.write_register.i32([[REG]], i32 [[OLD_VAL]])
 #pragma omp atomic capture
   {rix = ix; ix = 5;}

diff  --git a/clang/test/OpenMP/atomic_codegen.cpp b/clang/test/OpenMP/atomic_codegen.cpp
index e8c533e484f7..47ab0b4e1804 100644
--- a/clang/test/OpenMP/atomic_codegen.cpp
+++ b/clang/test/OpenMP/atomic_codegen.cpp
@@ -27,7 +27,7 @@ void parallel_atomic_ewc() {
   {
       // CHECK: invoke void @_ZN2StC1Ev(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR:%.+]])
       // CHECK: [[SCALAR_ADDR:%.+]] = invoke nonnull align 4 dereferenceable(4) i32* @_ZN2St3getEv(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR]])
-      // CHECK: [[SCALAR_VAL:%.+]] = load atomic i32, i32* [[SCALAR_ADDR]] monotonic
+      // CHECK: [[SCALAR_VAL:%.+]] = load atomic i32, i32* [[SCALAR_ADDR]] monotonic, align 4
       // CHECK: store i32 [[SCALAR_VAL]], i32* @b
       // CHECK98: invoke void @_ZN2StD1Ev(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR]])
       // CHECK11: call void @_ZN2StD1Ev(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR]])
@@ -36,21 +36,21 @@ void parallel_atomic_ewc() {
       // CHECK-DAG: invoke void @_ZN2StC1Ev(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR:%.+]])
       // CHECK-DAG: [[SCALAR_ADDR:%.+]] = invoke nonnull align 4 dereferenceable(4) i32* @_ZN2St3getEv(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR]])
       // CHECK-DAG: [[B_VAL:%.+]] = load i32, i32* @b
-      // CHECK: store atomic i32 [[B_VAL]], i32* [[SCALAR_ADDR]] monotonic
+      // CHECK: store atomic i32 [[B_VAL]], i32* [[SCALAR_ADDR]] monotonic, align 4
       // CHECK: {{invoke|call}} void @_ZN2StD1Ev(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR]])
 #pragma omp atomic write
       St().get() = b;
       // CHECK: invoke void @_ZN2StC1Ev(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR:%.+]])
       // CHECK: [[SCALAR_ADDR:%.+]] = invoke nonnull align 4 dereferenceable(4) i32* @_ZN2St3getEv(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR]])
       // CHECK: [[B_VAL:%.+]] = load i32, i32* @b
-      // CHECK: [[OLD_VAL:%.+]] = load atomic i32, i32* [[SCALAR_ADDR]] monotonic,
+      // CHECK: [[OLD_VAL:%.+]] = load atomic i32, i32* [[SCALAR_ADDR]] monotonic, align 4
       // CHECK: br label %[[OMP_UPDATE:.+]]
       // CHECK: [[OMP_UPDATE]]
       // CHECK: [[OLD_PHI_VAL:%.+]] = phi i32 [ [[OLD_VAL]], %{{.+}} ], [ [[NEW_OLD_VAL:%.+]], %[[OMP_UPDATE]] ]
       // CHECK: [[NEW_VAL:%.+]] = srem i32 [[OLD_PHI_VAL]], [[B_VAL]]
       // CHECK: store i32 [[NEW_VAL]], i32* [[TEMP:%.+]],
       // CHECK: [[NEW_VAL:%.+]] = load i32, i32* [[TEMP]],
-      // CHECK: [[RES:%.+]] = cmpxchg i32* [[SCALAR_ADDR]], i32 [[OLD_PHI_VAL]], i32 [[NEW_VAL]] monotonic monotonic
+      // CHECK: [[RES:%.+]] = cmpxchg i32* [[SCALAR_ADDR]], i32 [[OLD_PHI_VAL]], i32 [[NEW_VAL]] monotonic monotonic, align 4
       // CHECK: [[NEW_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
       // CHECK: [[COND:%.+]] = extractvalue { i32, i1 } [[RES]], 1
       // CHECK: br i1 [[COND]], label %[[OMP_DONE:.+]], label %[[OMP_UPDATE]]
@@ -63,14 +63,14 @@ void parallel_atomic_ewc() {
       // CHECK: invoke void @_ZN2StC1Ev(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR:%.+]])
       // CHECK: [[SCALAR_ADDR:%.+]] = invoke nonnull align 4 dereferenceable(4) i32* @_ZN2St3getEv(%struct.St* {{[^,]*}} [[TEMP_ST_ADDR]])
       // CHECK: [[B_VAL:%.+]] = load i32, i32* @b
-      // CHECK: [[OLD_VAL:%.+]] = load atomic i32, i32* [[SCALAR_ADDR]] monotonic,
+      // CHECK: [[OLD_VAL:%.+]] = load atomic i32, i32* [[SCALAR_ADDR]] monotonic, align 4
       // CHECK: br label %[[OMP_UPDATE:.+]]
       // CHECK: [[OMP_UPDATE]]
       // CHECK: [[OLD_PHI_VAL:%.+]] = phi i32 [ [[OLD_VAL]], %{{.+}} ], [ [[NEW_OLD_VAL:%.+]], %[[OMP_UPDATE]] ]
       // CHECK: [[NEW_CALC_VAL:%.+]] = srem i32 [[OLD_PHI_VAL]], [[B_VAL]]
       // CHECK: store i32 [[NEW_CALC_VAL]], i32* [[TEMP:%.+]],
       // CHECK: [[NEW_VAL:%.+]] = load i32, i32* [[TEMP]],
-      // CHECK: [[RES:%.+]] = cmpxchg i32* [[SCALAR_ADDR]], i32 [[OLD_PHI_VAL]], i32 [[NEW_VAL]] monotonic monotonic
+      // CHECK: [[RES:%.+]] = cmpxchg i32* [[SCALAR_ADDR]], i32 [[OLD_PHI_VAL]], i32 [[NEW_VAL]] monotonic monotonic, align 4
       // CHECK: [[NEW_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
       // CHECK: [[COND:%.+]] = extractvalue { i32, i1 } [[RES]], 1
       // CHECK: br i1 [[COND]], label %[[OMP_DONE:.+]], label %[[OMP_UPDATE]]
@@ -92,28 +92,28 @@ void parallel_atomic() {
     // TERM_DEBUG-NOT: __kmpc_global_thread_num
     // TERM_DEBUG:     invoke {{.*}}foo{{.*}}()
     // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
-    // TERM_DEBUG:     load atomic i32, i32* @{{.+}} monotonic, {{.*}}!dbg [[READ_LOC:![0-9]+]]
+    // TERM_DEBUG:     load atomic i32, i32* @{{.+}} monotonic, align 4, !dbg [[READ_LOC:![0-9]+]]
     foo() = a;
 #pragma omp atomic write
     // TERM_DEBUG-NOT: __kmpc_global_thread_num
     // TERM_DEBUG:     invoke {{.*}}foo{{.*}}()
     // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
     // TERM_DEBUG-NOT: __kmpc_global_thread_num
-    // TERM_DEBUG:     store atomic i32 {{%.+}}, i32* @{{.+}} monotonic, {{.*}}!dbg [[WRITE_LOC:![0-9]+]]
+    // TERM_DEBUG:     store atomic i32 {{%.+}}, i32* @{{.+}} monotonic, align 4, !dbg [[WRITE_LOC:![0-9]+]]
     a = foo();
 #pragma omp atomic update
     // TERM_DEBUG-NOT: __kmpc_global_thread_num
     // TERM_DEBUG:     invoke {{.*}}foo{{.*}}()
     // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
     // TERM_DEBUG-NOT: __kmpc_global_thread_num
-    // TERM_DEBUG:     atomicrmw add i32* @{{.+}}, i32 %{{.+}} monotonic, {{.*}}!dbg [[UPDATE_LOC:![0-9]+]]
+    // TERM_DEBUG:     atomicrmw add i32* @{{.+}}, i32 %{{.+}} monotonic, align 4, !dbg [[UPDATE_LOC:![0-9]+]]
     a += foo();
 #pragma omp atomic capture
     // TERM_DEBUG-NOT: __kmpc_global_thread_num
     // TERM_DEBUG:     invoke {{.*}}foo{{.*}}()
     // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
     // TERM_DEBUG-NOT: __kmpc_global_thread_num
-    // TERM_DEBUG:     [[OLD_VAL:%.+]] = atomicrmw add i32* @{{.+}}, i32 %{{.+}} monotonic, {{.*}}!dbg [[CAPTURE_LOC:![0-9]+]]
+    // TERM_DEBUG:     [[OLD_VAL:%.+]] = atomicrmw add i32* @{{.+}}, i32 %{{.+}} monotonic, align 4, !dbg [[CAPTURE_LOC:![0-9]+]]
     // TERM_DEBUG:     store i32 [[OLD_VAL]], i32* @b,
     {b = a; a += foo(); }
   }

diff  --git a/clang/test/OpenMP/atomic_read_codegen.c b/clang/test/OpenMP/atomic_read_codegen.c
index 211ddca3449d..ce9d5527a81c 100644
--- a/clang/test/OpenMP/atomic_read_codegen.c
+++ b/clang/test/OpenMP/atomic_read_codegen.c
@@ -82,62 +82,63 @@ float2 float2x;
 // register int rix __asm__("0");
 register int rix __asm__("esp");
 
+// CHECK-LABEL: @main(
 int main() {
-// CHECK: load atomic i8, i8*
+// CHECK: load atomic i8, i8* {{.*}} monotonic, align 1
 // CHECK: store i8
 #pragma omp atomic read
   bv = bx;
-// CHECK: load atomic i8, i8*
+// CHECK: load atomic i8, i8* {{.*}} monotonic, align 1
 // CHECK: store i8
 #pragma omp atomic read
   cv = cx;
-// CHECK: load atomic i8, i8*
+// CHECK: load atomic i8, i8* {{.*}} monotonic, align 1
 // CHECK: store i8
 #pragma omp atomic read
   ucv = ucx;
-// CHECK: load atomic i16, i16*
+// CHECK: load atomic i16, i16* {{.*}} monotonic, align 2
 // CHECK: store i16
 #pragma omp atomic read
   sv = sx;
-// CHECK: load atomic i16, i16*
+// CHECK: load atomic i16, i16* {{.*}} monotonic, align 2
 // CHECK: store i16
 #pragma omp atomic read
   usv = usx;
-// CHECK: load atomic i32, i32*
+// CHECK: load atomic i32, i32* {{.*}} monotonic, align 4
 // CHECK: store i32
 #pragma omp atomic read
   iv = ix;
-// CHECK: load atomic i32, i32*
+// CHECK: load atomic i32, i32* {{.*}} monotonic, align 4
 // CHECK: store i32
 #pragma omp atomic read
   uiv = uix;
-// CHECK: load atomic i64, i64*
+// CHECK: load atomic i64, i64* {{.*}} monotonic, align 8
 // CHECK: store i64
 #pragma omp atomic read
   lv = lx;
-// CHECK: load atomic i64, i64*
+// CHECK: load atomic i64, i64* {{.*}} monotonic, align 8
 // CHECK: store i64
 #pragma omp atomic read
   ulv = ulx;
-// CHECK: load atomic i64, i64*
+// CHECK: load atomic i64, i64* {{.*}} monotonic, align 8
 // CHECK: store i64
 #pragma omp atomic read
   llv = llx;
-// CHECK: load atomic i64, i64*
+// CHECK: load atomic i64, i64* {{.*}} monotonic, align 8
 // CHECK: store i64
 #pragma omp atomic read
   ullv = ullx;
-// CHECK: load atomic i32, i32* bitcast (float*
+// CHECK: load atomic i32, i32* bitcast (float* {{.*}} monotonic, align 4
 // CHECK: bitcast i32 {{.*}} to float
 // CHECK: store float
 #pragma omp atomic read
   fv = fx;
-// CHECK: load atomic i64, i64* bitcast (double*
+// CHECK: load atomic i64, i64* bitcast (double* {{.*}} monotonic, align 8
 // CHECK: bitcast i64 {{.*}} to double
 // CHECK: store double
 #pragma omp atomic read
   dv = dx;
-// CHECK: [[LD:%.+]] = load atomic i128, i128* bitcast (x86_fp80*
+// CHECK: [[LD:%.+]] = load atomic i128, i128* bitcast (x86_fp80* {{.*}} monotonic, align 16
 // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128*
 // CHECK: store i128 [[LD]], i128* [[BITCAST]]
 // CHECK: [[LD:%.+]] = load x86_fp80, x86_fp80* [[LDTEMP]]
@@ -160,33 +161,33 @@ int main() {
 // CHECK: store double
 #pragma omp atomic seq_cst read
   cdv = cdx;
-// CHECK: load atomic i64, i64*
+// CHECK: load atomic i64, i64* {{.*}} monotonic, align 8
 // CHECK: store i8
 #pragma omp atomic read
   bv = ulx;
-// CHECK: load atomic i8, i8*
+// CHECK: load atomic i8, i8* {{.*}} monotonic, align 1
 // CHECK: store i8
 #pragma omp atomic read
   cv = bx;
-// CHECK: load atomic i8, i8*
+// CHECK: load atomic i8, i8* {{.*}} seq_cst, align 1
 // CHECK: call{{.*}} @__kmpc_flush(
 // CHECK: store i8
-#pragma omp atomic read, seq_cst
+#pragma omp atomic read seq_cst
   ucv = cx;
-// CHECK: load atomic i64, i64*
+// CHECK: load atomic i64, i64* {{.*}} monotonic, align 8
 // CHECK: store i16
 #pragma omp atomic read
   sv = ulx;
-// CHECK: load atomic i64, i64*
+// CHECK: load atomic i64, i64* {{.*}} monotonic, align 8
 // CHECK: store i16
 #pragma omp atomic read
   usv = lx;
-// CHECK: load atomic i32, i32*
+// CHECK: load atomic i32, i32* {{.*}} seq_cst, align 4
 // CHECK: call{{.*}} @__kmpc_flush(
 // CHECK: store i32
 #pragma omp atomic seq_cst, read
   iv = uix;
-// CHECK: load atomic i32, i32*
+// CHECK: load atomic i32, i32* {{.*}} monotonic, align 4
 // CHECK: store i32
 #pragma omp atomic read
   uiv = ix;
@@ -194,15 +195,15 @@ int main() {
 // CHECK: store i64
 #pragma omp atomic read
   lv = cix;
-// CHECK: load atomic i32, i32*
+// CHECK: load atomic i32, i32* {{.*}} monotonic, align 4
 // CHECK: store i64
 #pragma omp atomic read
   ulv = fx;
-// CHECK: load atomic i64, i64*
+// CHECK: load atomic i64, i64* {{.*}} monotonic, align 8
 // CHECK: store i64
 #pragma omp atomic read
   llv = dx;
-// CHECK: load atomic i128, i128*
+// CHECK: load atomic i128, i128* {{.*}} monotonic, align 16
 // CHECK: store i64
 #pragma omp atomic read
   ullv = ldx;
@@ -210,30 +211,30 @@ int main() {
 // CHECK: store float
 #pragma omp atomic read
   fv = cix;
-// CHECK: load atomic i16, i16*
+// CHECK: load atomic i16, i16* {{.*}} monotonic, align 2
 // CHECK: store double
 #pragma omp atomic read
   dv = sx;
-// CHECK: load atomic i8, i8*
+// CHECK: load atomic i8, i8* {{.*}} monotonic, align 1
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bx;
-// CHECK: load atomic i8, i8*
+// CHECK: load atomic i8, i8* {{.*}} monotonic, align 1
 // CHECK: store i32
 // CHECK: store i32
 #pragma omp atomic read
   civ = bx;
-// CHECK: load atomic i16, i16*
+// CHECK: load atomic i16, i16* {{.*}} monotonic, align 2
 // CHECK: store float
 // CHECK: store float
 #pragma omp atomic read
   cfv = usx;
-// CHECK: load atomic i64, i64*
+// CHECK: load atomic i64, i64* {{.*}} monotonic, align 8
 // CHECK: store double
 // CHECK: store double
 #pragma omp atomic read
   cdv = llx;
-// CHECK: [[I128VAL:%.+]] = load atomic i128, i128* bitcast (<4 x i32>* @{{.+}} to i128*) monotonic
+// CHECK: [[I128VAL:%.+]] = load atomic i128, i128* bitcast (<4 x i32>* @{{.+}} to i128*) monotonic, align 16
 // CHECK: [[I128PTR:%.+]] = bitcast <4 x i32>* [[LDTEMP:%.+]] to i128*
 // CHECK: store i128 [[I128VAL]], i128* [[I128PTR]]
 // CHECK: [[LD:%.+]] = load <4 x i32>, <4 x i32>* [[LDTEMP]]
@@ -241,7 +242,7 @@ int main() {
 // CHECK: store i8
 #pragma omp atomic read
   bv = int4x[0];
-// CHECK: [[LD:%.+]] = load atomic i32, i32* bitcast (i8* getelementptr (i8, i8* bitcast (%{{.+}}* @{{.+}} to i8*), i64 4) to i32*) monotonic
+// CHECK: [[LD:%.+]] = load atomic i32, i32* bitcast (i8* getelementptr (i8, i8* bitcast (%{{.+}}* @{{.+}} to i8*), i64 4) to i32*) monotonic, align 4
 // CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]]
 // CHECK: [[LD:%.+]] = load i32, i32* [[LDTEMP]]
 // CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1
@@ -257,21 +258,21 @@ int main() {
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx_packed.a;
-// CHECK: [[LD:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @bfx2, i32 0, i32 0) monotonic
+// CHECK: [[LD:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @bfx2, i32 0, i32 0) monotonic, align 4
 // CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]]
 // CHECK: [[LD:%.+]] = load i32, i32* [[LDTEMP]]
 // CHECK: ashr i32 [[LD]], 31
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx2.a;
-// CHECK: [[LD:%.+]] = load atomic i8, i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @bfx2_packed to i8*), i64 3) monotonic
+// CHECK: [[LD:%.+]] = load atomic i8, i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @bfx2_packed to i8*), i64 3) monotonic, align 1
 // CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]]
 // CHECK: [[LD:%.+]] = load i8, i8* [[LDTEMP]]
 // CHECK: ashr i8 [[LD]], 7
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx2_packed.a;
-// CHECK: [[LD:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @bfx3, i32 0, i32 0) monotonic
+// CHECK: [[LD:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @bfx3, i32 0, i32 0) monotonic, align 4
 // CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]]
 // CHECK: [[LD:%.+]] = load i32, i32* [[LDTEMP]]
 // CHECK: [[SHL:%.+]] = shl i32 [[LD]], 7
@@ -288,7 +289,7 @@ int main() {
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx3_packed.a;
-// CHECK: [[LD:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @bfx4 to i64*) monotonic
+// CHECK: [[LD:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @bfx4 to i64*) monotonic, align 8
 // CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]]
 // CHECK: [[LD:%.+]] = load i64, i64* [[LDTEMP]]
 // CHECK: [[SHL:%.+]] = shl i64 [[LD]], 47
@@ -297,7 +298,7 @@ int main() {
 // CHECK: store x86_fp80
 #pragma omp atomic read
   ldv = bfx4.a;
-// CHECK: [[LD:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) monotonic
+// CHECK: [[LD:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) monotonic, align 1
 // CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]]
 // CHECK: [[LD:%.+]] = load i8, i8* [[LDTEMP]]
 // CHECK: [[SHL:%.+]] = shl i8 [[LD]], 7
@@ -306,7 +307,7 @@ int main() {
 // CHECK: store x86_fp80
 #pragma omp atomic relaxed read
   ldv = bfx4_packed.a;
-// CHECK: [[LD:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @bfx4 to i64*) monotonic
+// CHECK: [[LD:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @bfx4 to i64*) monotonic, align 8
 // CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]]
 // CHECK: [[LD:%.+]] = load i64, i64* [[LDTEMP]]
 // CHECK: [[SHL:%.+]] = shl i64 [[LD]], 40
@@ -314,7 +315,7 @@ int main() {
 // CHECK: store x86_fp80
 #pragma omp atomic read relaxed
   ldv = bfx4.b;
-// CHECK: [[LD:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) acquire
+// CHECK: [[LD:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) acquire, align 1
 // CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]]
 // CHECK: [[LD:%.+]] = load i8, i8* [[LDTEMP]]
 // CHECK: [[ASHR:%.+]] = ashr i8 [[LD]], 1
@@ -323,7 +324,7 @@ int main() {
 // CHECK: store x86_fp80
 #pragma omp atomic read acquire
   ldv = bfx4_packed.b;
-// CHECK: [[LD:%.+]] = load atomic i64, i64* bitcast (<2 x float>* @{{.+}} to i64*) monotonic
+// CHECK: [[LD:%.+]] = load atomic i64, i64* bitcast (<2 x float>* @{{.+}} to i64*) monotonic, align 8
 // CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i64*
 // CHECK: store i64 [[LD]], i64* [[BITCAST]]
 // CHECK: [[LD:%.+]] = load <2 x float>, <2 x float>* [[LDTEMP]]

diff  --git a/clang/test/OpenMP/atomic_update_codegen.cpp b/clang/test/OpenMP/atomic_update_codegen.cpp
index f8c45c5959be..fc0673617997 100644
--- a/clang/test/OpenMP/atomic_update_codegen.cpp
+++ b/clang/test/OpenMP/atomic_update_codegen.cpp
@@ -85,21 +85,21 @@ int main() {
 // CHECK-NOT: atomicrmw
 #pragma omp atomic
   ++dv;
-// CHECK: atomicrmw add i8* @{{.+}}, i8 1 monotonic
+// CHECK: atomicrmw add i8* @{{.+}}, i8 1 monotonic, align 1
 #pragma omp atomic
   bx++;
-// CHECK: atomicrmw add i8* @{{.+}}, i8 1 monotonic
+// CHECK: atomicrmw add i8* @{{.+}}, i8 1 monotonic, align 1
 #pragma omp atomic update
   ++cx;
-// CHECK: atomicrmw sub i8* @{{.+}}, i8 1 monotonic
+// CHECK: atomicrmw sub i8* @{{.+}}, i8 1 monotonic, align 1
 #pragma omp atomic
   ucx--;
-// CHECK: atomicrmw sub i16* @{{.+}}, i16 1 monotonic
+// CHECK: atomicrmw sub i16* @{{.+}}, i16 1 monotonic, align 2
 #pragma omp atomic update
   --sx;
 // CHECK: [[USV:%.+]] = load i16, i16* @{{.+}},
 // CHECK: [[EXPR:%.+]] = zext i16 [[USV]] to i32
-// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic, align 2
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i16 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -108,7 +108,7 @@ int main() {
 // CHECK: [[DESIRED:%.+]] = trunc i32 [[ADD]] to i16
 // CHECK: store i16 [[DESIRED]], i16* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i16, i16* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic, align 2
 // CHECK: [[OLD_X]] = extractvalue { i16, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i16, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -116,14 +116,14 @@ int main() {
 #pragma omp atomic
   usx += usv;
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED:%.+]] = mul nsw i32 [[EXPECTED]], [[EXPR]]
 // CHECK: store i32 [[DESIRED]], i32* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[OLD_X]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -131,18 +131,18 @@ int main() {
 #pragma omp atomic update
   ix *= iv;
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}},
-// CHECK: atomicrmw sub i32* @{{.+}}, i32 [[EXPR]] monotonic
+// CHECK: atomicrmw sub i32* @{{.+}}, i32 [[EXPR]] monotonic, align 4
 #pragma omp atomic
   uix -= uiv;
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED:%.+]] = shl i32 [[EXPECTED]], [[EXPR]]
 // CHECK: store i32 [[DESIRED]], i32* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[OLD_X]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -150,14 +150,14 @@ int main() {
 #pragma omp atomic update
   ix <<= iv;
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i32, i32* [[X_ADDR:@.+]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED:%.+]] = lshr i32 [[EXPECTED]], [[EXPR]]
 // CHECK: store i32 [[DESIRED]], i32* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[X_ADDR]], i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[OLD_X]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -165,14 +165,14 @@ int main() {
 #pragma omp atomic
   uix >>= uiv;
 // CHECK: [[EXPR:%.+]] = load i64, i64* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i64, i64* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i64, i64* [[X_ADDR:@.+]] monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED:%.+]] = sdiv i64 [[EXPECTED]], [[EXPR]]
 // CHECK: store i64 [[DESIRED]], i64* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* [[X_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* [[X_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[OLD_X]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -180,19 +180,19 @@ int main() {
 #pragma omp atomic update
   lx /= lv;
 // CHECK: [[EXPR:%.+]] = load i64, i64* @{{.+}},
-// CHECK: atomicrmw and i64* @{{.+}}, i64 [[EXPR]] monotonic
+// CHECK: atomicrmw and i64* @{{.+}}, i64 [[EXPR]] monotonic, align 8
 #pragma omp atomic
   ulx &= ulv;
 // CHECK: [[EXPR:%.+]] = load i64, i64* @{{.+}},
-// CHECK: atomicrmw xor i64* @{{.+}}, i64 [[EXPR]] monotonic
+// CHECK: atomicrmw xor i64* @{{.+}}, i64 [[EXPR]] monotonic, align 8
 #pragma omp atomic update
   llx ^= llv;
 // CHECK: [[EXPR:%.+]] = load i64, i64* @{{.+}},
-// CHECK: atomicrmw or i64* @{{.+}}, i64 [[EXPR]] monotonic
+// CHECK: atomicrmw or i64* @{{.+}}, i64 [[EXPR]] monotonic, align 8
 #pragma omp atomic
   ullx |= ullv;
 // CHECK: [[EXPR:%.+]] = load float, float* @{{.+}},
-// CHECK: [[OLD:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic
+// CHECK: [[OLD:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[OLD]], %{{.+}} ], [ [[PREV:%.+]], %[[CONT]] ]
@@ -201,7 +201,7 @@ int main() {
 // CHECK: [[ADD:%.+]] = fadd float [[OLD]], [[EXPR]]
 // CHECK: store float [[ADD]], float* [[TEMP]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[BITCAST]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[PREV:%.+]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -209,7 +209,7 @@ int main() {
 #pragma omp atomic update
   fx = fx + fv;
 // CHECK: [[EXPR:%.+]] = load double, double* @{{.+}},
-// CHECK: [[OLD:%.+]] = load atomic i64, i64*  bitcast (double* [[X_ADDR:@.+]] to i64*) monotonic
+// CHECK: [[OLD:%.+]] = load atomic i64, i64*  bitcast (double* [[X_ADDR:@.+]] to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[OLD]], %{{.+}} ], [ [[PREV:%.+]], %[[CONT]] ]
@@ -218,7 +218,7 @@ int main() {
 // CHECK: [[SUB:%.+]] = fsub double [[EXPR]], [[OLD]]
 // CHECK: store double [[SUB]], double* [[TEMP]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[BITCAST]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (double* [[X_ADDR]] to i64*), i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (double* [[X_ADDR]] to i64*), i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[PREV:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -226,7 +226,7 @@ int main() {
 #pragma omp atomic
   dx = dv - dx;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}},
-// CHECK: [[OLD:%.+]] = load atomic i128, i128*  bitcast (x86_fp80* [[X_ADDR:@.+]] to i128*) monotonic
+// CHECK: [[OLD:%.+]] = load atomic i128, i128*  bitcast (x86_fp80* [[X_ADDR:@.+]] to i128*) monotonic, align 16
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i128 [ [[OLD]], %{{.+}} ], [ [[PREV:%.+]], %[[CONT]] ]
@@ -238,7 +238,7 @@ int main() {
 // CHECK: [[MUL:%.+]] = fmul x86_fp80 [[OLD]], [[EXPR]]
 // CHECK: store x86_fp80 [[MUL]], x86_fp80* [[TEMP]]
 // CHECK: [[DESIRED:%.+]] = load i128, i128* [[BITCAST]]
-// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (x86_fp80* [[X_ADDR]] to i128*), i128 [[EXPECTED]], i128 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (x86_fp80* [[X_ADDR]] to i128*), i128 [[EXPECTED]], i128 [[DESIRED]] monotonic monotonic, align 16
 // CHECK: [[PREV:%.+]] = extractvalue { i128, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i128, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -315,12 +315,12 @@ int main() {
 // CHECK: [[BV:%.+]] = load i8, i8* @{{.+}}
 // CHECK: [[BOOL:%.+]] = trunc i8 [[BV]] to i1
 // CHECK: [[EXPR:%.+]] = zext i1 [[BOOL]] to i64
-// CHECK: atomicrmw and i64* @{{.+}}, i64 [[EXPR]] monotonic
+// CHECK: atomicrmw and i64* @{{.+}}, i64 [[EXPR]] monotonic, align 8
 #pragma omp atomic update
   ulx = ulx & bv;
 // CHECK: [[CV:%.+]]  = load i8, i8* @{{.+}}, align 1
 // CHECK: [[EXPR:%.+]] = sext i8 [[CV]] to i32
-// CHECK: [[BX:%.+]] = load atomic i8, i8* [[BX_ADDR:@.+]] monotonic
+// CHECK: [[BX:%.+]] = load atomic i8, i8* [[BX_ADDR:@.+]] monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i8 [ [[BX]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -331,7 +331,7 @@ int main() {
 // CHECK: [[DESIRED:%.+]] = zext i1 [[CAST]] to i8
 // CHECK: store i8 [[DESIRED]], i8* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i8, i8* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i8* [[BX_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* [[BX_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic, align 1
 // CHECK: [[OLD_X:%.+]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -340,7 +340,7 @@ int main() {
   bx = cv & bx;
 // CHECK: [[UCV:%.+]]  = load i8, i8* @{{.+}},
 // CHECK: [[EXPR:%.+]] = zext i8 [[UCV]] to i32
-// CHECK: [[X:%.+]] = load atomic i8, i8* [[CX_ADDR:@.+]] seq_cst
+// CHECK: [[X:%.+]] = load atomic i8, i8* [[CX_ADDR:@.+]] seq_cst, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i8 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -349,7 +349,7 @@ int main() {
 // CHECK: [[DESIRED:%.+]] = trunc i32 [[ASHR]] to i8
 // CHECK: store i8 [[DESIRED]], i8* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i8, i8* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i8* [[CX_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] seq_cst seq_cst
+// CHECK: [[RES:%.+]] = cmpxchg i8* [[CX_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] seq_cst seq_cst, align 1
 // CHECK: [[OLD_X:%.+]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -359,7 +359,7 @@ int main() {
   cx = cx >> ucv;
 // CHECK: [[SV:%.+]]  = load i16, i16* @{{.+}},
 // CHECK: [[EXPR:%.+]] = sext i16 [[SV]] to i32
-// CHECK: [[X:%.+]] = load atomic i64, i64* [[ULX_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i64, i64* [[ULX_ADDR:@.+]] monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -368,7 +368,7 @@ int main() {
 // CHECK: [[DESIRED:%.+]] = sext i32 [[SHL]] to i64
 // CHECK: store i64 [[DESIRED]], i64* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* [[ULX_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* [[ULX_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[OLD_X:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -377,14 +377,14 @@ int main() {
   ulx = sv << ulx;
 // CHECK: [[USV:%.+]]  = load i16, i16* @{{.+}},
 // CHECK: [[EXPR:%.+]] = zext i16 [[USV]] to i64
-// CHECK: [[X:%.+]] = load atomic i64, i64* [[LX_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i64, i64* [[LX_ADDR:@.+]] monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
 // CHECK: [[DESIRED:%.+]] = srem i64 [[EXPECTED]], [[EXPR]]
 // CHECK: store i64 [[DESIRED]], i64* [[TEMP:%.+]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* [[LX_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* [[LX_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[OLD_X:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -392,12 +392,12 @@ int main() {
 #pragma omp atomic
   lx = lx % usv;
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}}
-// CHECK: atomicrmw or i32* @{{.+}}, i32 [[EXPR]] seq_cst
+// CHECK: atomicrmw or i32* @{{.+}}, i32 [[EXPR]] seq_cst, align 4
 // CHECK: call{{.*}} @__kmpc_flush(
 #pragma omp atomic seq_cst, update
   uix = iv | uix;
 // CHECK: [[EXPR:%.+]] = load i32, i32* @{{.+}}
-// CHECK: atomicrmw and i32* @{{.+}}, i32 [[EXPR]] monotonic
+// CHECK: atomicrmw and i32* @{{.+}}, i32 [[EXPR]] monotonic, align 4
 #pragma omp atomic
   ix = ix & uiv;
 // CHECK: [[EXPR:%.+]] = load i64, i64* @{{.+}},
@@ -423,7 +423,7 @@ int main() {
   cix = lv + cix;
 // CHECK: [[ULV:%.+]] = load i64, i64* @{{.+}},
 // CHECK: [[EXPR:%.+]] = uitofp i64 [[ULV]] to float
-// CHECK: [[OLD:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic
+// CHECK: [[OLD:%.+]] = load atomic i32, i32*  bitcast (float* [[X_ADDR:@.+]] to i32*) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i32 [ [[OLD]], %{{.+}} ], [ [[PREV:%.+]], %[[CONT]] ]
@@ -432,7 +432,7 @@ int main() {
 // CHECK: [[MUL:%.+]] = fmul float [[OLD]], [[EXPR]]
 // CHECK: store float [[MUL]], float* [[TEMP]],
 // CHECK: [[DESIRED:%.+]] = load i32, i32* [[BITCAST]],
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (float* [[X_ADDR]] to i32*), i32 [[EXPECTED]], i32 [[DESIRED]] monotonic monotonic, align 4
 // CHECK: [[PREV:%.+]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -441,7 +441,7 @@ int main() {
   fx = fx * ulv;
 // CHECK: [[LLV:%.+]] = load i64, i64* @{{.+}},
 // CHECK: [[EXPR:%.+]] = sitofp i64 [[LLV]] to double
-// CHECK: [[OLD:%.+]] = load atomic i64, i64*  bitcast (double* [[X_ADDR:@.+]] to i64*) monotonic
+// CHECK: [[OLD:%.+]] = load atomic i64, i64*  bitcast (double* [[X_ADDR:@.+]] to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i64 [ [[OLD]], %{{.+}} ], [ [[PREV:%.+]], %[[CONT]] ]
@@ -450,7 +450,7 @@ int main() {
 // CHECK: [[DIV:%.+]] = fdiv double [[OLD]], [[EXPR]]
 // CHECK: store double [[DIV]], double* [[TEMP]],
 // CHECK: [[DESIRED:%.+]] = load i64, i64* [[BITCAST]],
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (double* [[X_ADDR]] to i64*), i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (double* [[X_ADDR]] to i64*), i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic, align 8
 // CHECK: [[PREV:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -459,7 +459,7 @@ int main() {
   dx /= llv;
 // CHECK: [[ULLV:%.+]] = load i64, i64* @{{.+}},
 // CHECK: [[EXPR:%.+]] = uitofp i64 [[ULLV]] to x86_fp80
-// CHECK: [[OLD:%.+]] = load atomic i128, i128*  bitcast (x86_fp80* [[X_ADDR:@.+]] to i128*) monotonic
+// CHECK: [[OLD:%.+]] = load atomic i128, i128*  bitcast (x86_fp80* [[X_ADDR:@.+]] to i128*) monotonic, align 16
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i128 [ [[OLD]], %{{.+}} ], [ [[PREV:%.+]], %[[CONT]] ]
@@ -470,7 +470,7 @@ int main() {
 // CHECK: [[SUB:%.+]] = fsub x86_fp80 [[OLD]], [[EXPR]]
 // CHECK: store x86_fp80 [[SUB]], x86_fp80* [[TEMP1]]
 // CHECK: [[DESIRED:%.+]] = load i128, i128* [[BITCAST1]]
-// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (x86_fp80* [[X_ADDR]] to i128*), i128 [[EXPECTED]], i128 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (x86_fp80* [[X_ADDR]] to i128*), i128 [[EXPECTED]], i128 [[DESIRED]] monotonic monotonic, align 16
 // CHECK: [[PREV:%.+]] = extractvalue { i128, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i128, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -499,7 +499,7 @@ int main() {
 #pragma omp atomic update
   cix = fv / cix;
 // CHECK: [[EXPR:%.+]] = load double, double* @{{.+}},
-// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic
+// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic, align 2
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i16 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ]
@@ -509,7 +509,7 @@ int main() {
 // CHECK: [[DESIRED:%.+]] = fptosi double [[ADD]] to i16
 // CHECK: store i16 [[DESIRED]], i16* [[TEMP:%.+]]
 // CHECK: [[DESIRED:%.+]] = load i16, i16* [[TEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic, align 2
 // CHECK: [[OLD_X]] = extractvalue { i16, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i16, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -517,7 +517,7 @@ int main() {
 #pragma omp atomic
   sx = sx + dv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}},
-// CHECK: [[XI8:%.+]] = load atomic i8, i8* [[X_ADDR:@.+]] monotonic
+// CHECK: [[XI8:%.+]] = load atomic i8, i8* [[X_ADDR:@.+]] monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i8 [ [[XI8]], %{{.+}} ], [ [[OLD_XI8:%.+]], %[[CONT]] ]
@@ -529,7 +529,7 @@ int main() {
 // CHECK: [[DESIRED:%.+]] = zext i1 [[BOOL_DESIRED]] to i8
 // CHECK: store i8 [[DESIRED]], i8* [[TEMP:%.+]]
 // CHECK: [[DESIRED:%.+]] = load i8, i8* [[TEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* [[X_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] release monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* [[X_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] release monotonic, align 1
 // CHECK: [[OLD_XI8:%.+]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -539,7 +539,7 @@ int main() {
   bx = ldv * bx;
 // CHECK: [[EXPR_RE:%.+]] = load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* [[CIV_ADDR:@.+]], i32 0, i32 0),
 // CHECK: [[EXPR_IM:%.+]] = load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* [[CIV_ADDR]], i32 0, i32 1),
-// CHECK: [[XI8:%.+]] = load atomic i8, i8* [[X_ADDR:@.+]] monotonic
+// CHECK: [[XI8:%.+]] = load atomic i8, i8* [[X_ADDR:@.+]] monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[EXPECTED:%.+]] = phi i8 [ [[XI8]], %{{.+}} ], [ [[OLD_XI8:%.+]], %[[CONT]] ]
@@ -553,7 +553,7 @@ int main() {
 // CHECK: [[DESIRED:%.+]] = zext i1 [[BOOL_DESIRED]] to i8
 // CHECK: store i8 [[DESIRED]], i8* [[TEMP:%.+]]
 // CHECK: [[DESIRED:%.+]] = load i8, i8* [[TEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* [[X_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* [[X_ADDR]], i8 [[EXPECTED]], i8 [[DESIRED]] monotonic monotonic, align 1
 // CHECK: [[OLD_XI8:%.+]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]]
@@ -563,7 +563,7 @@ int main() {
 // CHECK: [[IDX:%.+]] = load i16, i16* @{{.+}}
 // CHECK: load i8, i8*
 // CHECK: [[VEC_ITEM_VAL:%.+]] = zext i1 %{{.+}} to i32
-// CHECK: [[I128VAL:%.+]] = load atomic i128, i128* bitcast (<4 x i32>* [[DEST:@.+]] to i128*) monotonic
+// CHECK: [[I128VAL:%.+]] = load atomic i128, i128* bitcast (<4 x i32>* [[DEST:@.+]] to i128*) monotonic, align 16
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_I128:%.+]] = phi i128 [ [[I128VAL]], %{{.+}} ], [ [[FAILED_I128_OLD_VAL:%.+]], %[[CONT]] ]
@@ -578,7 +578,7 @@ int main() {
 // CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <4 x i32> [[VEC_VAL]], i32 [[OR]], i16 [[IDX]]
 // CHECK: store <4 x i32> [[NEW_VEC_VAL]], <4 x i32>* [[TEMP]]
 // CHECK: [[NEW_I128:%.+]] = load i128, i128* [[BITCAST]]
-// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (<4 x i32>* [[DEST]] to i128*), i128 [[OLD_I128]], i128 [[NEW_I128]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (<4 x i32>* [[DEST]] to i128*), i128 [[OLD_I128]], i128 [[NEW_I128]] monotonic monotonic, align 16
 // CHECK: [[FAILED_I128_OLD_VAL:%.+]] = extractvalue { i128, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -586,7 +586,7 @@ int main() {
 #pragma omp atomic update
   int4x[sv] |= bv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -604,7 +604,7 @@ int main() {
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, i32* [[TEMP1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i32, i32* [[TEMP1]]
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -639,7 +639,7 @@ int main() {
 #pragma omp atomic update
   bfx_packed.a *= ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -657,7 +657,7 @@ int main() {
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, i32* [[TEMP1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i32, i32* [[TEMP1]]
-// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -665,7 +665,7 @@ int main() {
 #pragma omp atomic
   bfx2.a -= ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3) monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -687,7 +687,7 @@ int main() {
 // CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i8 %{{.+}}, i8* [[BITCAST1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i8, i8* [[BITCAST1]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -695,7 +695,7 @@ int main() {
 #pragma omp atomic update
   bfx2_packed.a = ldv / bfx2_packed.a;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -714,7 +714,7 @@ int main() {
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, i32* [[TEMP1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i32, i32* [[TEMP1]]
-// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -753,7 +753,7 @@ int main() {
 #pragma omp atomic update
   bfx3_packed.a += ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -774,7 +774,7 @@ int main() {
 // CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i64 %{{.+}}, i64* [[TEMP1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[TEMP1]]
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic, align 8
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -782,7 +782,7 @@ int main() {
 #pragma omp atomic
   bfx4.a = bfx4.a * ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %{{.+}} ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -804,7 +804,7 @@ int main() {
 // CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i8 %{{.+}}, i8* [[BITCAST1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i8, i8* [[BITCAST1]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -812,7 +812,7 @@ int main() {
 #pragma omp atomic relaxed update
   bfx4_packed.a -= ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -831,7 +831,7 @@ int main() {
 // CHECK: [[VAL:%.+]] = or i64 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i64 [[VAL]], i64* [[TEMP1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[TEMP1]]
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic, align 8
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -839,7 +839,7 @@ int main() {
 #pragma omp atomic
   bfx4.b /= ldv;
 // CHECK: [[EXPR:%.+]] = load x86_fp80, x86_fp80* @{{.+}}
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -861,7 +861,7 @@ int main() {
 // CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i8 %{{.+}}, i8* [[BITCAST1]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i8, i8* [[BITCAST1]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -870,7 +870,7 @@ int main() {
   bfx4_packed.b += ldv;
 // CHECK: load i64, i64*
 // CHECK: [[EXPR:%.+]] = uitofp i64 %{{.+}} to float
-// CHECK: [[I64VAL:%.+]] = load atomic i64, i64* bitcast (<2 x float>* [[DEST:@.+]] to i64*) monotonic
+// CHECK: [[I64VAL:%.+]] = load atomic i64, i64* bitcast (<2 x float>* [[DEST:@.+]] to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_I64:%.+]] = phi i64 [ [[I64VAL]], %{{.+}} ], [ [[FAILED_I64_OLD_VAL:%.+]], %[[CONT]] ]
@@ -885,7 +885,7 @@ int main() {
 // CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <2 x float> [[VEC_VAL]], float [[VEC_ITEM_VAL]], i64 0
 // CHECK: store <2 x float> [[NEW_VEC_VAL]], <2 x float>* [[TEMP]]
 // CHECK: [[NEW_I64:%.+]] = load i64, i64* [[BITCAST]]
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (<2 x float>* [[DEST]] to i64*), i64 [[OLD_I64]], i64 [[NEW_I64]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (<2 x float>* [[DEST]] to i64*), i64 [[OLD_I64]], i64 [[NEW_I64]] monotonic monotonic, align 8
 // CHECK: [[FAILED_I64_OLD_VAL:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]

diff  --git a/clang/test/OpenMP/atomic_write_codegen.c b/clang/test/OpenMP/atomic_write_codegen.c
index f3b3acfbee3f..6b1497bb9022 100644
--- a/clang/test/OpenMP/atomic_write_codegen.c
+++ b/clang/test/OpenMP/atomic_write_codegen.c
@@ -83,61 +83,61 @@ float2 float2x;
 register int rix __asm__("esp");
 
 int main() {
-// CHECK: store atomic i32 1, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @civ, i32 0, i32 1) monotonic,
+// CHECK: store atomic i32 1, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @civ, i32 0, i32 1) monotonic, align 4
 #pragma omp atomic write
  __imag(civ) = 1;
 // CHECK: load i8, i8*
-// CHECK: store atomic i8{{.*}}monotonic
+// CHECK: store atomic i8 {{.*}} monotonic, align 1
 #pragma omp atomic write
   bx = bv;
 // CHECK: load i8, i8*
-// CHECK: store atomic i8{{.*}}release
+// CHECK: store atomic i8 {{.*}} release, align 1
 #pragma omp atomic write release
   cx = cv;
 // CHECK: load i8, i8*
-// CHECK: store atomic i8
+// CHECK: store atomic i8 {{.*}} monotonic, align 1
 #pragma omp atomic write
   ucx = ucv;
 // CHECK: load i16, i16*
-// CHECK: store atomic i16
+// CHECK: store atomic i16 {{.*}} monotonic, align 2
 #pragma omp atomic write
   sx = sv;
 // CHECK: load i16, i16*
-// CHECK: store atomic i16
+// CHECK: store atomic i16 {{.*}} monotonic, align 2
 #pragma omp atomic write
   usx = usv;
 // CHECK: load i32, i32*
-// CHECK: store atomic i32
+// CHECK: store atomic i32 {{.*}} monotonic, align 4
 #pragma omp atomic write
   ix = iv;
 // CHECK: load i32, i32*
-// CHECK: store atomic i32
+// CHECK: store atomic i32 {{.*}} monotonic, align 4
 #pragma omp atomic write
   uix = uiv;
 // CHECK: load i64, i64*
-// CHECK: store atomic i64
+// CHECK: store atomic i64 {{.*}} monotonic, align 8
 #pragma omp atomic write
   lx = lv;
 // CHECK: load i64, i64*
-// CHECK: store atomic i64
+// CHECK: store atomic i64 {{.*}} monotonic, align 8
 #pragma omp atomic write
   ulx = ulv;
 // CHECK: load i64, i64*
-// CHECK: store atomic i64
+// CHECK: store atomic i64 {{.*}} monotonic, align 8
 #pragma omp atomic write
   llx = llv;
 // CHECK: load i64, i64*
-// CHECK: store atomic i64
+// CHECK: store atomic i64 {{.*}} monotonic, align 8
 #pragma omp atomic write
   ullx = ullv;
 // CHECK: load float, float*
 // CHECK: bitcast float {{.*}} to i32
-// CHECK: store atomic i32 {{.*}}, i32* bitcast (float*
+// CHECK: store atomic i32 {{.*}}, i32* bitcast (float* {{.*}} monotonic, align 4
 #pragma omp atomic write
   fx = fv;
 // CHECK: load double, double*
 // CHECK: bitcast double {{.*}} to i64
-// CHECK: store atomic i64 {{.*}}, i64* bitcast (double*
+// CHECK: store atomic i64 {{.*}}, i64* bitcast (double* {{.*}} monotonic, align 8
 #pragma omp atomic write
   dx = dv;
 // CHECK: [[LD:%.+]] = load x86_fp80, x86_fp80*
@@ -146,7 +146,7 @@ int main() {
 // CHECK: store x86_fp80 [[LD]], x86_fp80* [[LDTEMP]]
 // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128*
 // CHECK: [[LD:%.+]] = load i128, i128* [[BITCAST]]
-// CHECK: store atomic i128 [[LD]], i128* bitcast (x86_fp80*
+// CHECK: store atomic i128 [[LD]], i128* bitcast (x86_fp80* {{.*}} monotonic, align 16
 #pragma omp atomic write
   ldx = ldv;
 // CHECK: [[REAL_VAL:%.+]] = load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @{{.*}}, i32 0, i32 0)
@@ -181,33 +181,33 @@ int main() {
 #pragma omp atomic seq_cst write
   cdx = cdv;
 // CHECK: load i8, i8*
-// CHECK: store atomic i64
+// CHECK: store atomic i64 {{.*}} monotonic, align 8
 #pragma omp atomic write
   ulx = bv;
 // CHECK: load i8, i8*
-// CHECK: store atomic i8
+// CHECK: store atomic i8 {{.*}} monotonic, align 1
 #pragma omp atomic write
   bx = cv;
 // CHECK: load i8, i8*
-// CHECK: store atomic i8{{.*}}seq_cst
+// CHECK: store atomic i8 {{.*}} seq_cst, align 1
 // CHECK: call{{.*}} @__kmpc_flush(
 #pragma omp atomic write, seq_cst
   cx = ucv;
 // CHECK: load i16, i16*
-// CHECK: store atomic i64
+// CHECK: store atomic i64 {{.*}} monotonic, align 8
 #pragma omp atomic write
   ulx = sv;
 // CHECK: load i16, i16*
-// CHECK: store atomic i64
+// CHECK: store atomic i64 {{.*}} monotonic, align 8
 #pragma omp atomic write
   lx = usv;
 // CHECK: load i32, i32*
-// CHECK: store atomic i32
+// CHECK: store atomic i32 {{.*}} seq_cst, align 4
 // CHECK: call{{.*}} @__kmpc_flush(
 #pragma omp atomic seq_cst, write
   uix = iv;
 // CHECK: load i32, i32*
-// CHECK: store atomic i32
+// CHECK: store atomic i32 {{.*}} monotonic, align 4
 #pragma omp atomic write
   ix = uiv;
 // CHECK: load i64, i64*
@@ -221,11 +221,11 @@ int main() {
 #pragma omp atomic write
   cix = lv;
 // CHECK: load i64, i64*
-// CHECK: store atomic i32 %{{.+}}, i32* bitcast (float*
+// CHECK: store atomic i32 %{{.+}}, i32* bitcast (float* {{.*}} monotonic, align 4
 #pragma omp atomic write
   fx = ulv;
 // CHECK: load i64, i64*
-// CHECK: store atomic i64 %{{.+}}, i64* bitcast (double*
+// CHECK: store atomic i64 %{{.+}}, i64* bitcast (double* {{.*}} monotonic, align 8
 #pragma omp atomic write
   dx = llv;
 // CHECK: load i64, i64*
@@ -235,7 +235,7 @@ int main() {
 // CHECK: store x86_fp80 [[VAL]], x86_fp80* [[TEMP]]
 // CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[TEMP]] to i128*
 // CHECK: [[VAL:%.+]] = load i128, i128* [[BITCAST]]
-// CHECK: store atomic i128 [[VAL]], i128* bitcast (x86_fp80*
+// CHECK: store atomic i128 [[VAL]], i128* bitcast (x86_fp80* {{.*}} monotonic, align 16
 #pragma omp atomic write
   ldx = ullv;
 // CHECK: load float, float*
@@ -249,11 +249,11 @@ int main() {
 #pragma omp atomic write
   cix = fv;
 // CHECK: load double, double*
-// CHECK: store atomic i16
+// CHECK: store atomic i16 {{.*}} monotonic, align 2
 #pragma omp atomic write
   sx = dv;
 // CHECK: load x86_fp80, x86_fp80*
-// CHECK: store atomic i8
+// CHECK: store atomic i8 {{.*}} monotonic, align 1
 #pragma omp atomic write
   bx = ldv;
 // CHECK: load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @{{.+}}, i32 0, i32 0)
@@ -261,21 +261,21 @@ int main() {
 // CHECK: icmp ne i32 %{{.+}}, 0
 // CHECK: icmp ne i32 %{{.+}}, 0
 // CHECK: or i1
-// CHECK: store atomic i8
+// CHECK: store atomic i8 {{.*}} monotonic, align 1
 #pragma omp atomic write
   bx = civ;
 // CHECK: load float, float* getelementptr inbounds ({ float, float }, { float, float }* @{{.*}}, i32 0, i32 0)
-// CHECK: store atomic i16
+// CHECK: store atomic i16 {{.*}} monotonic, align 2
 #pragma omp atomic write
   usx = cfv;
 // CHECK: load double, double* getelementptr inbounds ({ double, double }, { double, double }* @{{.+}}, i32 0, i32 0)
-// CHECK: store atomic i64
+// CHECK: store atomic i64 {{.*}} monotonic, align 8
 #pragma omp atomic write
   llx = cdv;
 // CHECK-DAG: [[IDX:%.+]] = load i16, i16* @{{.+}}
 // CHECK-DAG: load i8, i8*
 // CHECK-DAG: [[VEC_ITEM_VAL:%.+]] = zext i1 %{{.+}} to i32
-// CHECK: [[I128VAL:%.+]] = load atomic i128, i128* bitcast (<4 x i32>* [[DEST:@.+]] to i128*) monotonic
+// CHECK: [[I128VAL:%.+]] = load atomic i128, i128* bitcast (<4 x i32>* [[DEST:@.+]] to i128*) monotonic, align 16
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_I128:%.+]] = phi i128 [ [[I128VAL]], %{{.+}} ], [ [[FAILED_I128_OLD_VAL:%.+]], %[[CONT]] ]
@@ -285,7 +285,7 @@ int main() {
 // CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <4 x i32> [[VEC_VAL]], i32 [[VEC_ITEM_VAL]], i16 [[IDX]]
 // CHECK: store <4 x i32> [[NEW_VEC_VAL]], <4 x i32>* [[LDTEMP]]
 // CHECK: [[NEW_I128:%.+]] = load i128, i128* [[BITCAST]]
-// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (<4 x i32>* [[DEST]] to i128*), i128 [[OLD_I128]], i128 [[NEW_I128]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (<4 x i32>* [[DEST]] to i128*), i128 [[OLD_I128]], i128 [[NEW_I128]] monotonic monotonic, align 16
 // CHECK: [[FAILED_I128_OLD_VAL:%.+]] = extractvalue { i128, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -294,7 +294,7 @@ int main() {
   int4x[sv] = bv;
 // CHECK: load x86_fp80, x86_fp80* @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -303,7 +303,7 @@ int main() {
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i32, i32* [[LDTEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (i8* getelementptr (i8, i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -332,7 +332,7 @@ int main() {
   bfx_packed.a = ldv;
 // CHECK: load x86_fp80, x86_fp80* @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -342,7 +342,7 @@ int main() {
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i32, i32* [[LDTEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields2, %struct.BitFields2* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -351,7 +351,7 @@ int main() {
   bfx2.a = ldv;
 // CHECK: load x86_fp80, x86_fp80* @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3) monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -362,7 +362,7 @@ int main() {
 // CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i8 %{{.+}}, i8* [[LDTEMP:%.+]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i8, i8* [[LDTEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr (i8, i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -371,7 +371,7 @@ int main() {
   bfx2_packed.a = ldv;
 // CHECK: load x86_fp80, x86_fp80* @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i32, i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0) monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -381,7 +381,7 @@ int main() {
 // CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i32, i32* [[LDTEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields3, %struct.BitFields3* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] monotonic monotonic, align 4
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -412,7 +412,7 @@ int main() {
   bfx3_packed.a = ldv;
 // CHECK: load x86_fp80, x86_fp80* @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -423,7 +423,7 @@ int main() {
 // CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i64 %{{.+}}, i64* [[LDTEMP:%.+]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[LDTEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic, align 8
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -432,7 +432,7 @@ int main() {
   bfx4.a = ldv;
 // CHECK: load x86_fp80, x86_fp80* @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -442,7 +442,7 @@ int main() {
 // CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i8 %{{.+}}, i8* [[LDTEMP:%.+]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i8, i8* [[LDTEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -451,7 +451,7 @@ int main() {
   bfx4_packed.a = ldv;
 // CHECK: load x86_fp80, x86_fp80* @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i64
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i64, i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -461,7 +461,7 @@ int main() {
 // CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i64 %{{.+}}, i64* [[LDTEMP:%.+]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i64, i64* [[LDTEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] monotonic monotonic, align 8
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -470,7 +470,7 @@ int main() {
   bfx4.b = ldv;
 // CHECK: load x86_fp80, x86_fp80* @{{.+}}
 // CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i64
-// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic
+// CHECK: [[PREV_VALUE:%.+]] = load atomic i8, i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) monotonic, align 1
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ]
@@ -481,7 +481,7 @@ int main() {
 // CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]]
 // CHECK: store i8 %{{.+}}, i8* [[LDTEMP:%.+]]
 // CHECK: [[NEW_BF_VALUE:%.+]] = load i8, i8* [[LDTEMP]]
-// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed, %struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] monotonic monotonic, align 1
 // CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -490,7 +490,7 @@ int main() {
   bfx4_packed.b = ldv;
 // CHECK: load i64, i64*
 // CHECK: [[VEC_ITEM_VAL:%.+]] = uitofp i64 %{{.+}} to float
-// CHECK: [[I64VAL:%.+]] = load atomic i64, i64* bitcast (<2 x float>* [[DEST:@.+]] to i64*) monotonic
+// CHECK: [[I64VAL:%.+]] = load atomic i64, i64* bitcast (<2 x float>* [[DEST:@.+]] to i64*) monotonic, align 8
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[OLD_I64:%.+]] = phi i64 [ [[I64VAL]], %{{.+}} ], [ [[FAILED_I64_OLD_VAL:%.+]], %[[CONT]] ]
@@ -500,7 +500,7 @@ int main() {
 // CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <2 x float> [[VEC_VAL]], float [[VEC_ITEM_VAL]], i64 0
 // CHECK: store <2 x float> [[NEW_VEC_VAL]], <2 x float>* [[LDTEMP]]
 // CHECK: [[NEW_I64:%.+]] = load i64, i64* [[BITCAST]]
-// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (<2 x float>* [[DEST]] to i64*), i64 [[OLD_I64]], i64 [[NEW_I64]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (<2 x float>* [[DEST]] to i64*), i64 [[OLD_I64]], i64 [[NEW_I64]] monotonic monotonic, align 8
 // CHECK: [[FAILED_I64_OLD_VAL:%.+]] = extractvalue { i64, i1 } [[RES]], 0
 // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1
 // CHECK: br i1 [[FAIL_SUCCESS]], label %[[EXIT:.+]], label %[[CONT]]
@@ -510,7 +510,7 @@ int main() {
 // CHECK: call i32 @llvm.read_register.i32(
 // CHECK: sitofp i32 %{{.+}} to double
 // CHECK: bitcast double %{{.+}} to i64
-// CHECK: store atomic i64 %{{.+}}, i64* bitcast (double* @{{.+}} to i64*) seq_cst
+// CHECK: store atomic i64 %{{.+}}, i64* bitcast (double* @{{.+}} to i64*) seq_cst, align 8
 // CHECK: call{{.*}} @__kmpc_flush(
 #pragma omp atomic write seq_cst
   dv = rix;

diff  --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp
index 8878ad29f6a2..609d10163994 100644
--- a/clang/test/OpenMP/for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen.cpp
@@ -111,7 +111,7 @@ int main() {
     // LAMBDA: [[CASE2]]
     // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE_ADDR]]
     // LAMBDA: fadd double
-    // LAMBDA: cmpxchg i64*
+    // LAMBDA: cmpxchg i64* {{.*}}, align 8
     // LAMBDA: call void @__kmpc_end_reduce(
     // LAMBDA: br label %[[REDUCTION_DONE]]
     // LAMBDA: [[REDUCTION_DONE]]
@@ -173,7 +173,7 @@ int main() {
     // BLOCKS: [[CASE2]]
     // BLOCKS: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE_ADDR]]
     // BLOCKS: fadd double
-    // BLOCKS: cmpxchg i64*
+    // BLOCKS: cmpxchg i64* {{.*}}, align 8
     // BLOCKS: call void @__kmpc_end_reduce(
     // BLOCKS: br label %[[REDUCTION_DONE]]
     // BLOCKS: [[REDUCTION_DONE]]
@@ -390,14 +390,14 @@ int main() {
 // t_var += t_var_reduction;
 // CHECK: load float, float* [[T_VAR_PRIV]]
 // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32*
-// CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR_REF_INT]] monotonic,
+// CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR_REF_INT]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %[[CONT]] ]
 // CHECK: fadd float
 // CHECK: [[UP_INT:%.+]] = load i32, i32*
 // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32*
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic, align 4
 // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]]
@@ -432,7 +432,7 @@ int main() {
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: load float, float* [[T_VAR1_PRIV]]
 // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32*
-// CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR1_REF_INT]] monotonic,
+// CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR1_REF_INT]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %{{.+}} ]
@@ -441,7 +441,7 @@ int main() {
 // CHECK: phi float
 // CHECK: [[UP_INT:%.+]] = load i32
 // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32*
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR1_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR1_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic, align 4
 // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]]
@@ -649,7 +649,7 @@ int main() {
 // CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[LB1_0]], [[END]]
 // CHECK: br i1 [[ISEMPTY]],
 // CHECK: phi i32*
-// CHECK: atomicrmw add i32* %{{.+}}, i32 %{{.+}} monotonic
+// CHECK: atomicrmw add i32* %{{.+}}, i32 %{{.+}} monotonic, align 4
 // CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
 // CHECK: br i1 [[DONE]],
 
@@ -843,7 +843,7 @@ int main() {
 // CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[LB1_0]], [[END]]
 // CHECK: br i1 [[ISEMPTY]],
 // CHECK: phi i32*
-// CHECK: atomicrmw add i32* %{{.+}}, i32 %{{.+}} monotonic
+// CHECK: atomicrmw add i32* %{{.+}}, i32 %{{.+}} monotonic, align 4
 // CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
 // CHECK: br i1 [[DONE]],
 
@@ -1338,7 +1338,7 @@ int main() {
 // case 2:
 // t_var += t_var_reduction;
 // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]]
-// CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic
+// CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic, align 4
 
 // var = var.operator &(var_reduction);
 // CHECK: call void @__kmpc_critical(
@@ -1368,7 +1368,7 @@ int main() {
 
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]]
-// CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic
+// CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic, align 4
 
 // break;
 // CHECK: br label %[[RED_DONE]]
@@ -1481,4 +1481,3 @@ int main() {
 // CHECK: ret void
 
 #endif
-

diff  --git a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp
index f58131cbc534..c0ccaf8ac8b8 100644
--- a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp
+++ b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp
@@ -45,7 +45,7 @@ int main() {
 // CHECK: br label %[[DONE]]
 // CHECK: [[DONE]]:
 // CHECK: call void @__kmpc_end_critical(%struct.ident_t* @{{.+}}, i32 %{{.+}}, [8 x i32]* @{{.+}})
-// CHECK: atomicrmw add i32*
+// CHECK: atomicrmw add i32* {{.*}}, align 4
 // CHECK: call void @__kmpc_critical(%struct.ident_t* @{{.+}}, i32 %{{.+}}, [8 x i32]* @{{.+}})
 // CHECK: [[LAST_IV_VAL:%.+]] = load i32, i32* [[LAST_IV:@.+]],
 // CHECK: [[RES:%.+]] = icmp sle i32 [[LAST_IV_VAL]], [[IV:%.+]]
@@ -92,10 +92,10 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define internal void [[OUTLINED2]](i32* {{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}})
-// CHECK: atomicrmw add i32* [[A_SHARED:%.+]], i32 %{{.+}} monotonic
+// CHECK: atomicrmw add i32* [[A_SHARED:%.+]], i32 %{{.+}} monotonic, align 4
 // CHECK: [[BASE:%.+]] = bitcast i32* [[A_SHARED]] to [[STRUCT:%struct[.].+]]*
 // CHECK: [[FIRED:%.+]] = getelementptr inbounds [[STRUCT]], [[STRUCT]]* [[BASE]], i{{.+}} 0, i{{.+}} 1
-// CHECK: store atomic volatile i8 1, i8* [[FIRED]] unordered,
+// CHECK: store atomic volatile i8 1, i8* [[FIRED]] unordered, align 1
 // CHECK: ret void
 
 #endif // HEADER

diff  --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp
index dc5b60547674..b1c48187ec8d 100644
--- a/clang/test/OpenMP/parallel_master_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_codegen.cpp
@@ -492,7 +492,7 @@ void parallel_master_reduction() {
 
 // case 2:
 // CK6:       [[ELEVEN:%.+]] = load i32, i32* [[G_1]]
-// CK6:       [[TWELVE:%.+]] = atomicrmw add i32* [[ZERO]], i32 [[ELEVEN]] monotonic
+// CK6:       [[TWELVE:%.+]] = atomicrmw add i32* [[ZERO]], i32 [[ELEVEN]] monotonic, align 4
 
 // CK6:       define internal void [[RED_FUNC]](i8* [[ZERO]], i8* [[ONE]])
 // CK6:       ret void

diff  --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp
index 3f5b7ebd0027..ed5a85b636e7 100644
--- a/clang/test/OpenMP/parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp
@@ -198,7 +198,7 @@ int main() {
     // LAMBDA: br label %[[REDUCTION_DONE]]
     // LAMBDA: [[CASE2]]
     // LAMBDA: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
-    // LAMBDA: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic
+    // LAMBDA: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic, align 4
     // LAMBDA: br label %[[REDUCTION_DONE]]
     // LAMBDA: [[REDUCTION_DONE]]
     // LAMBDA: ret void
@@ -255,7 +255,7 @@ int main() {
     // BLOCKS: br label %[[REDUCTION_DONE]]
     // BLOCKS: [[CASE2]]
     // BLOCKS: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
-    // BLOCKS: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic
+    // BLOCKS: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic, align 4
     // BLOCKS: br label %[[REDUCTION_DONE]]
     // BLOCKS: [[REDUCTION_DONE]]
     // BLOCKS: ret void
@@ -450,14 +450,14 @@ int main() {
 // t_var += t_var_reduction;
 // CHECK: load float, float* [[T_VAR_PRIV]]
 // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32*
-// CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR_REF_INT]] monotonic,
+// CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR_REF_INT]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %[[CONT]] ]
 // CHECK: fadd float
 // CHECK: [[UP_INT:%.+]] = load i32
 // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32*
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic, align 4
 // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]]
@@ -492,7 +492,7 @@ int main() {
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: load float, float* [[T_VAR1_PRIV]]
 // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32*
-// CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR1_REF_INT]] monotonic,
+// CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR1_REF_INT]] monotonic, align 4
 // CHECK: br label %[[CONT:.+]]
 // CHECK: [[CONT]]
 // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %{{.+}} ]
@@ -501,7 +501,7 @@ int main() {
 // CHECK: [[UP:%.+]] = phi float
 // CHECK: [[UP_INT:%.+]] = load i32
 // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32*
-// CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR1_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic
+// CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR1_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic, align 4
 // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0
 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]]
@@ -771,7 +771,7 @@ int main() {
 // case 2:
 // t_var += t_var_reduction;
 // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]]
-// CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic
+// CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic, align 4
 
 // var = var.operator &(var_reduction);
 // CHECK: call void @__kmpc_critical(
@@ -801,7 +801,7 @@ int main() {
 
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]]
-// CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic
+// CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic, align 4
 
 // break;
 // CHECK: br label %[[RED_DONE]]
@@ -892,4 +892,3 @@ int main() {
 // CHECK: ret void
 
 #endif
-

diff  --git a/clang/test/OpenMP/requires_acq_rel_codegen.cpp b/clang/test/OpenMP/requires_acq_rel_codegen.cpp
index b8ba01b0cafe..911c49b62471 100644
--- a/clang/test/OpenMP/requires_acq_rel_codegen.cpp
+++ b/clang/test/OpenMP/requires_acq_rel_codegen.cpp
@@ -16,19 +16,19 @@
 // CHECK-LABEL: foo
 void foo() {
   int a = 0, b = 0;
-// CHECK: load atomic i32,{{.*}}acquire
+// CHECK: load atomic i32, {{.*}} acquire, align 4
 #pragma omp atomic read
   a = b;
-// CHECK: store atomic i32{{.*}}release
+// CHECK: store atomic i32 {{.*}} release, align 4
 #pragma omp atomic write
   a = b;
-// CHECK: atomicrmw add i32{{.*}}release
+// CHECK: atomicrmw add i32* {{.*}} release, align 4
 #pragma omp atomic
   a += 1;
-// CHECK: atomicrmw add i32{{.*}}release
+// CHECK: atomicrmw add i32* {{.*}} release, align 4
 #pragma omp atomic update
   a += 1;
-// CHECK: atomicrmw add i32{{.*}}acq_rel
+// CHECK: atomicrmw add i32* {{.*}} acq_rel, align 4
 #pragma omp atomic capture
   {
     b = a;

diff  --git a/clang/test/OpenMP/requires_relaxed_codegen.cpp b/clang/test/OpenMP/requires_relaxed_codegen.cpp
index e92b55e8f137..1c194afa52ce 100644
--- a/clang/test/OpenMP/requires_relaxed_codegen.cpp
+++ b/clang/test/OpenMP/requires_relaxed_codegen.cpp
@@ -16,19 +16,19 @@
 // CHECK-LABEL: foo
 void foo() {
   int a = 0, b = 0;
-// CHECK: load atomic i32,{{.*}}monotonic
+// CHECK: load atomic i32, {{.*}} monotonic, align 4
 #pragma omp atomic read
   a = b;
-// CHECK: store atomic i32{{.*}}monotonic
+// CHECK: store atomic i32 {{.*}} monotonic, align 4
 #pragma omp atomic write
   a = b;
-// CHECK: atomicrmw add i32{{.*}}monotonic
+// CHECK: atomicrmw add i32* {{.*}} monotonic, align 4
 #pragma omp atomic
   a += 1;
-// CHECK: atomicrmw add i32{{.*}}monotonic
+// CHECK: atomicrmw add i32* {{.*}} monotonic, align 4
 #pragma omp atomic update
   a += 1;
-// CHECK: atomicrmw add i32{{.*}}monotonic
+// CHECK: atomicrmw add i32* {{.*}} monotonic, align 4
 #pragma omp atomic capture
   {
     b = a;

diff  --git a/clang/test/OpenMP/requires_seq_cst_codegen.cpp b/clang/test/OpenMP/requires_seq_cst_codegen.cpp
index c2f02665d153..ee92f0d69649 100644
--- a/clang/test/OpenMP/requires_seq_cst_codegen.cpp
+++ b/clang/test/OpenMP/requires_seq_cst_codegen.cpp
@@ -16,19 +16,19 @@
 // CHECK-LABEL: foo
 void foo() {
   int a = 0, b = 0;
-// CHECK: load atomic i32,{{.*}}seq_cst
+// CHECK: load atomic i32, {{.*}} seq_cst, align 4
 #pragma omp atomic read
   a = b;
-// CHECK: store atomic i32{{.*}}seq_cst
+// CHECK: store atomic i32 {{.*}} seq_cst, align 4
 #pragma omp atomic write
   a = b;
-// CHECK: atomicrmw add i32{{.*}}seq_cst
+// CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4
 #pragma omp atomic
   a += 1;
-// CHECK: atomicrmw add i32{{.*}}seq_cst
+// CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4
 #pragma omp atomic update
   a += 1;
-// CHECK: atomicrmw add i32{{.*}}seq_cst
+// CHECK: atomicrmw add i32* {{.*}} seq_cst, align 4
 #pragma omp atomic capture
   {
     b = a;

diff  --git a/clang/test/OpenMP/sections_reduction_codegen.cpp b/clang/test/OpenMP/sections_reduction_codegen.cpp
index e016e9fbba04..44e18c929b33 100644
--- a/clang/test/OpenMP/sections_reduction_codegen.cpp
+++ b/clang/test/OpenMP/sections_reduction_codegen.cpp
@@ -93,7 +93,7 @@ int main() {
     // LAMBDA: [[CASE2]]
     // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE_ADDR]]
     // LAMBDA: fadd double
-    // LAMBDA: cmpxchg i64*
+    // LAMBDA: cmpxchg i64* {{.*}}, align 8
     // LAMBDA: call void @__kmpc_end_reduce(
     // LAMBDA: br label %[[REDUCTION_DONE]]
     // LAMBDA: [[REDUCTION_DONE]]
@@ -154,7 +154,7 @@ int main() {
     // BLOCKS: [[CASE2]]
     // BLOCKS: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE_ADDR]]
     // BLOCKS: fadd double
-    // BLOCKS: cmpxchg i64*
+    // BLOCKS: cmpxchg i64* {{.*}}, align 8
     // BLOCKS: call void @__kmpc_end_reduce(
     // BLOCKS: br label %[[REDUCTION_DONE]]
     // BLOCKS: [[REDUCTION_DONE]]
@@ -338,7 +338,7 @@ int main() {
 // case 2:
 // t_var += t_var_reduction;
 // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]]
-// CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic
+// CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic, align 4
 
 // var = var.operator &(var_reduction);
 // CHECK: call void @__kmpc_critical(
@@ -368,7 +368,7 @@ int main() {
 
 // t_var1 = min(t_var1, t_var1_reduction);
 // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]]
-// CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic
+// CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic, align 4
 
 // break;
 // CHECK: br label %[[RED_DONE]]
@@ -458,4 +458,3 @@ int main() {
 // CHECK: ret void
 
 #endif
-

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
index 094a68918809..770d10a70529 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
@@ -89,7 +89,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: br
 
     // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
@@ -136,7 +136,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: br
 
     sivar += i;
@@ -213,7 +213,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
@@ -257,7 +257,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
@@ -304,7 +304,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
@@ -348,6 +348,6 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 #endif

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
index 2a006775fc54..f832959857f0 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -89,7 +89,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: br
 
     // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
@@ -136,7 +136,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: br
 
     sivar += i;
@@ -213,7 +213,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
@@ -257,7 +257,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
@@ -304,7 +304,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
@@ -348,6 +348,6 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 #endif

diff  --git a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
index 6f946f57eb83..1fa273b5edb9 100644
--- a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
@@ -91,7 +91,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
     // LAMBDA: br
     sivar += i;
@@ -168,7 +168,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 
@@ -217,7 +217,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
index f408fdc77e80..a35642c8e28a 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
@@ -91,7 +91,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
     // LAMBDA: br
     sivar += i;
@@ -168,7 +168,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 
@@ -217,7 +217,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 

diff  --git a/clang/test/OpenMP/taskloop_with_atomic_codegen.cpp b/clang/test/OpenMP/taskloop_with_atomic_codegen.cpp
index 16c09b68d0cf..7cabcc3377f9 100644
--- a/clang/test/OpenMP/taskloop_with_atomic_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_with_atomic_codegen.cpp
@@ -25,7 +25,7 @@ int main() {
 
 // CHECK: define internal i32 @{{.+}}(
 // Check that occupanices var is firstprivatized.
-// CHECK-DAG: atomicrmw add i32* [[FP_OCCUP:%.+]], i32 1
+// CHECK-DAG: atomicrmw add i32* [[FP_OCCUP:%.+]], i32 1 monotonic, align 4
 // CHECK-DAG: [[FP_OCCUP]] = load i32*, i32** [[FP_OCCUP_ADDR:%[^,]+]],
 // CHECK-DAG: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, i32** [[FP_OCCUP_ADDR]])
 

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
index 596a35835993..9f2d8363785d 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
@@ -92,7 +92,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: br
 
     // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
@@ -138,7 +138,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: br
 
     sivar += i;
@@ -217,7 +217,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
@@ -261,7 +261,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
@@ -309,7 +309,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
@@ -353,6 +353,6 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 #endif

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
index 9099c1cd50e7..1f3d91b962e0 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -92,7 +92,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: br
 
     // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
@@ -138,7 +138,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: br
 
     sivar += i;
@@ -220,7 +220,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
@@ -264,7 +264,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
@@ -312,7 +312,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
@@ -356,7 +356,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: br
 
 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}

diff  --git a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
index d386775079a8..0905c9c129f7 100644
--- a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
@@ -93,7 +93,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
     // LAMBDA: br
     
@@ -173,7 +173,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 
@@ -223,7 +223,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 

diff  --git a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
index 70e1fdca5fc5..7ecbeb7bc1c1 100644
--- a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
@@ -93,7 +93,7 @@ int main() {
     // LAMBDA: br
     // LAMBDA: [[CASE2]]:
     // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+    // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
     // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
     // LAMBDA: br
     
@@ -173,7 +173,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 
@@ -223,7 +223,7 @@ int main() {
 // CHECK: br
 // CHECK: [[CASE2]]:
 // CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
-// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
+// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] monotonic, align {{.+}}
 // CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
 // CHECK: br
 


        


More information about the cfe-commits mailing list