[llvm-branch-commits] [llvm] 4a2e374 - Revert "[NVPTX] Weak cmpxchg unittests for NVPTX (#176078)"
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jan 26 12:52:32 PST 2026
Author: Vladimir Vereschaka
Date: 2026-01-26T12:52:28-08:00
New Revision: 4a2e374ec05869267d33981bcfda09250f535ddf
URL: https://github.com/llvm/llvm-project/commit/4a2e374ec05869267d33981bcfda09250f535ddf
DIFF: https://github.com/llvm/llvm-project/commit/4a2e374ec05869267d33981bcfda09250f535ddf.diff
LOG: Revert "[NVPTX] Weak cmpxchg unittests for NVPTX (#176078)"
This reverts commit e936715b8c5e624b5d6cc1e2e50a32c394d03209.
Added:
Modified:
llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
llvm/test/CodeGen/NVPTX/cmpxchg.py
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
index 393c1ceae6916..d895c715ab3ce 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
@@ -1,3878 +1,2104 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | FileCheck %s --check-prefix=SM60
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %}
+; RUN: %if ptxas-sm_60 && ptxas-isa-5.0 %{ llc < %s -march=nvptx64 -mcpu=sm_60 -mattr=+ptx50 | %ptxas-verify -arch=sm_60 %}
-define i8 @weak_monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_monotonic_monotonic_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_monotonic_monotonic_i8_global_cta_param_0];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r6, [weak_monotonic_monotonic_i8_global_cta_param_1];
-; SM60-NEXT: ld.param.b8 %r7, [weak_monotonic_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_monotonic_acquire_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_monotonic_acquire_i8_global_cta_param_0];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r6, [weak_monotonic_acquire_i8_global_cta_param_1];
-; SM60-NEXT: ld.param.b8 %r7, [weak_monotonic_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_monotonic_seq_cst_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_monotonic_seq_cst_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_monotonic_seq_cst_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_monotonic_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_acquire_monotonic_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acquire_monotonic_i8_global_cta_param_0];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r6, [weak_acquire_monotonic_i8_global_cta_param_1];
-; SM60-NEXT: ld.param.b8 %r7, [weak_acquire_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_acquire_acquire_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acquire_acquire_i8_global_cta_param_0];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r6, [weak_acquire_acquire_i8_global_cta_param_1];
-; SM60-NEXT: ld.param.b8 %r7, [weak_acquire_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_acquire_seq_cst_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acquire_seq_cst_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_acquire_seq_cst_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_acquire_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_release_monotonic_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_release_monotonic_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_release_monotonic_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_release_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_release_acquire_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_release_acquire_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_release_acquire_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_release_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_release_seq_cst_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_release_seq_cst_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_release_seq_cst_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_release_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_acq_rel_monotonic_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_monotonic_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_acq_rel_monotonic_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_acq_rel_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_acq_rel_seq_cst_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_seq_cst_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_acq_rel_seq_cst_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_acq_rel_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_seq_cst_monotonic_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_seq_cst_monotonic_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_seq_cst_monotonic_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_seq_cst_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_seq_cst_acquire_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_seq_cst_acquire_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_seq_cst_acquire_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_seq_cst_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_seq_cst_seq_cst_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_seq_cst_seq_cst_i8_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_seq_cst_seq_cst_i8_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_seq_cst_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i16 @weak_monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_monotonic_monotonic_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_monotonic_monotonic_i16_global_cta_param_0];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r6, [weak_monotonic_monotonic_i16_global_cta_param_1];
-; SM60-NEXT: ld.param.b16 %r7, [weak_monotonic_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_monotonic_acquire_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_monotonic_acquire_i16_global_cta_param_0];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r6, [weak_monotonic_acquire_i16_global_cta_param_1];
-; SM60-NEXT: ld.param.b16 %r7, [weak_monotonic_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_monotonic_seq_cst_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_monotonic_seq_cst_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_monotonic_seq_cst_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_monotonic_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_acquire_monotonic_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acquire_monotonic_i16_global_cta_param_0];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r6, [weak_acquire_monotonic_i16_global_cta_param_1];
-; SM60-NEXT: ld.param.b16 %r7, [weak_acquire_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_acquire_acquire_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acquire_acquire_i16_global_cta_param_0];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r6, [weak_acquire_acquire_i16_global_cta_param_1];
-; SM60-NEXT: ld.param.b16 %r7, [weak_acquire_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_acquire_seq_cst_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acquire_seq_cst_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_acquire_seq_cst_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_acquire_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_release_monotonic_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_release_monotonic_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_release_monotonic_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_release_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_release_acquire_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_release_acquire_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_release_acquire_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_release_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_release_seq_cst_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_release_seq_cst_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_release_seq_cst_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_release_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_acq_rel_monotonic_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_monotonic_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_acq_rel_monotonic_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_acq_rel_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_acq_rel_acquire_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_acq_rel_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_acq_rel_seq_cst_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_seq_cst_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_acq_rel_seq_cst_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_acq_rel_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_seq_cst_monotonic_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_seq_cst_monotonic_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_seq_cst_monotonic_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_seq_cst_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_seq_cst_acquire_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_seq_cst_acquire_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_seq_cst_acquire_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_seq_cst_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: weak_seq_cst_seq_cst_i16_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_seq_cst_seq_cst_i16_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r6, [weak_seq_cst_seq_cst_i16_global_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [weak_seq_cst_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 65535;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i32 @weak_monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_monotonic_monotonic_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_monotonic_monotonic_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_monotonic_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_monotonic_monotonic_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_monotonic_acquire_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_monotonic_acquire_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_monotonic_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_monotonic_acquire_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_monotonic_seq_cst_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_monotonic_seq_cst_i32_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [weak_monotonic_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_monotonic_seq_cst_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acquire_monotonic_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acquire_monotonic_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_acquire_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acquire_monotonic_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acquire_acquire_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acquire_acquire_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_acquire_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acquire_acquire_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acquire_seq_cst_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acquire_seq_cst_i32_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [weak_acquire_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acquire_seq_cst_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_release_monotonic_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_release_monotonic_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_release_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_release_monotonic_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_release_acquire_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_release_acquire_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_release_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_release_acquire_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_release_seq_cst_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_release_seq_cst_i32_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [weak_release_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_release_seq_cst_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acq_rel_monotonic_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_monotonic_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_acq_rel_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acq_rel_monotonic_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acq_rel_seq_cst_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_seq_cst_i32_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [weak_acq_rel_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acq_rel_seq_cst_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_seq_cst_monotonic_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_seq_cst_monotonic_i32_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [weak_seq_cst_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_seq_cst_monotonic_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_seq_cst_acquire_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_seq_cst_acquire_i32_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [weak_seq_cst_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_seq_cst_acquire_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_seq_cst_seq_cst_i32_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_seq_cst_seq_cst_i32_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [weak_seq_cst_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_seq_cst_seq_cst_i32_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i64 @weak_monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_monotonic_monotonic_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_monotonic_monotonic_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [weak_monotonic_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_monotonic_monotonic_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_monotonic_acquire_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_monotonic_acquire_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [weak_monotonic_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_monotonic_acquire_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_monotonic_seq_cst_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_monotonic_seq_cst_i64_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [weak_monotonic_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_monotonic_seq_cst_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_acquire_monotonic_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acquire_monotonic_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acquire_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_acquire_monotonic_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_acquire_acquire_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acquire_acquire_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acquire_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_acquire_acquire_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_acquire_seq_cst_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acquire_seq_cst_i64_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acquire_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_acquire_seq_cst_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_release_monotonic_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_release_monotonic_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [weak_release_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_release_monotonic_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_release_acquire_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_release_acquire_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [weak_release_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_release_acquire_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_release_seq_cst_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_release_seq_cst_i64_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [weak_release_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_release_seq_cst_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_acq_rel_monotonic_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_monotonic_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_acq_rel_monotonic_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_acq_rel_acquire_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_acq_rel_seq_cst_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_seq_cst_i64_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_acq_rel_seq_cst_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_seq_cst_monotonic_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_seq_cst_monotonic_i64_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [weak_seq_cst_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_seq_cst_monotonic_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_seq_cst_acquire_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_seq_cst_acquire_i64_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [weak_seq_cst_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_seq_cst_acquire_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: weak_seq_cst_seq_cst_i64_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .b64 %rd<5>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_seq_cst_seq_cst_i64_global_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [weak_seq_cst_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [weak_seq_cst_seq_cst_i64_global_cta_param_2];
-; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i8_global(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_global_param_0];
-; SM60-NEXT: membar.sys;
-; SM60-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_global_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_global_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.sys;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i32_global(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_param_2];
-; SM60-NEXT: atom.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i32_global_sys(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_sys_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_sys_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_sys_param_2];
-; SM60-NEXT: atom.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i32_global_gpu(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_gpu_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_gpu_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_gpu_param_2];
-; SM60-NEXT: atom.gpu.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i8_generic_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<17>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_generic_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_generic_cta_param_1];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_generic_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM60-NEXT: and.b32 %r9, %r8, 3;
-; SM60-NEXT: shl.b32 %r1, %r9, 3;
-; SM60-NEXT: mov.b32 %r10, 255;
-; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i8_shared_cta(
+define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: monotonic_monotonic_i8_global_cta(
; SM60: {
+; SM60-NEXT: .reg .pred %p<3>;
+; SM60-NEXT: .reg .b16 %rs<2>;
; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_shared_cta_param_0];
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_shared_cta_param_1];
+; SM60-NEXT: ld.param.b8 %rs1, [monotonic_monotonic_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i8_global_cta_param_0];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_shared_cta_param_2];
+; SM60-NEXT: ld.param.b8 %r7, [monotonic_monotonic_i8_global_cta_param_1];
; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
; SM60-NEXT: and.b32 %r9, %r8, 3;
; SM60-NEXT: shl.b32 %r1, %r9, 3;
; SM60-NEXT: mov.b32 %r10, 255;
; SM60-NEXT: shl.b32 %r11, %r10, %r1;
-; SM60-NEXT: not.b32 %r12, %r11;
-; SM60-NEXT: shl.b32 %r2, %r7, %r1;
-; SM60-NEXT: shl.b32 %r3, %r6, %r1;
-; SM60-NEXT: ld.shared.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r4, %r13, %r12;
-; SM60-NEXT: or.b32 %r14, %r4, %r2;
-; SM60-NEXT: or.b32 %r15, %r4, %r3;
-; SM60-NEXT: atom.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i32_generic_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_generic_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_generic_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_generic_cta_param_2];
-; SM60-NEXT: atom.cta.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: weak_acq_rel_acquire_i32_shared_cta(
-; SM60: {
-; SM60-NEXT: .reg .b32 %r<4>;
-; SM60-NEXT: .reg .b64 %rd<2>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_shared_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_shared_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_shared_cta_param_2];
-; SM60-NEXT: atom.cta.shared.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
-; SM60-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i8 @strong_monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_monotonic_monotonic_i8_global_cta(
-; SM60: {
-; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
-; SM60-NEXT: .reg .b64 %rd<3>;
-; SM60-EMPTY:
-; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_monotonic_monotonic_i8_global_cta_param_0];
-; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [strong_monotonic_monotonic_i8_global_cta_param_1];
-; SM60-NEXT: ld.param.b8 %r8, [strong_monotonic_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB68_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB68_3;
+; SM60-NEXT: @%p1 bra $L__BB0_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB68_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB68_1;
-; SM60-NEXT: $L__BB68_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB0_1;
+; SM60-NEXT: $L__BB0_3: // %partword.cmpxchg.end
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic
+ ret i8 %new
}
-define i8 @strong_monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_monotonic_acquire_i8_global_cta(
+define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: monotonic_acquire_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_monotonic_acquire_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [monotonic_acquire_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i8_global_cta_param_0];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [strong_monotonic_acquire_i8_global_cta_param_1];
-; SM60-NEXT: ld.param.b8 %r8, [strong_monotonic_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: ld.param.b8 %r7, [monotonic_acquire_i8_global_cta_param_1];
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB69_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB69_3;
+; SM60-NEXT: @%p1 bra $L__BB1_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB69_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB69_1;
-; SM60-NEXT: $L__BB69_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB1_1;
+; SM60-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire
+ ret i8 %new
}
-define i8 @strong_monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_monotonic_seq_cst_i8_global_cta(
+define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: monotonic_seq_cst_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_monotonic_seq_cst_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [monotonic_seq_cst_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_monotonic_seq_cst_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [monotonic_seq_cst_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_monotonic_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB70_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB70_3;
+; SM60-NEXT: @%p1 bra $L__BB2_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB70_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB70_1;
-; SM60-NEXT: $L__BB70_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB2_1;
+; SM60-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst
+ ret i8 %new
}
-define i8 @strong_acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_acquire_monotonic_i8_global_cta(
+define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: acquire_monotonic_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acquire_monotonic_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [acquire_monotonic_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i8_global_cta_param_0];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [strong_acquire_monotonic_i8_global_cta_param_1];
-; SM60-NEXT: ld.param.b8 %r8, [strong_acquire_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: ld.param.b8 %r7, [acquire_monotonic_i8_global_cta_param_1];
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB71_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB71_3;
+; SM60-NEXT: @%p1 bra $L__BB3_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB71_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB71_1;
-; SM60-NEXT: $L__BB71_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB3_1;
+; SM60-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic
+ ret i8 %new
}
-define i8 @strong_acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_acquire_acquire_i8_global_cta(
+define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: acquire_acquire_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acquire_acquire_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [acquire_acquire_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acquire_acquire_i8_global_cta_param_0];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r7, [strong_acquire_acquire_i8_global_cta_param_1];
-; SM60-NEXT: ld.param.b8 %r8, [strong_acquire_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: ld.param.b8 %r7, [acquire_acquire_i8_global_cta_param_1];
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB72_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB72_3;
+; SM60-NEXT: @%p1 bra $L__BB4_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB72_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB72_1;
-; SM60-NEXT: $L__BB72_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB4_1;
+; SM60-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire
+ ret i8 %new
}
-define i8 @strong_acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_acquire_seq_cst_i8_global_cta(
+define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: acquire_seq_cst_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acquire_seq_cst_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [acquire_seq_cst_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_acquire_seq_cst_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [acquire_seq_cst_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_acquire_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB73_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB5_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB73_3;
+; SM60-NEXT: @%p1 bra $L__BB5_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB73_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB73_1;
-; SM60-NEXT: $L__BB73_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB5_1;
+; SM60-NEXT: $L__BB5_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst
+ ret i8 %new
}
-define i8 @strong_release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_release_monotonic_i8_global_cta(
+define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: release_monotonic_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_release_monotonic_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [release_monotonic_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [release_monotonic_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_release_monotonic_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [release_monotonic_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_release_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB74_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB6_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB74_3;
+; SM60-NEXT: @%p1 bra $L__BB6_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB74_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB74_1;
-; SM60-NEXT: $L__BB74_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB6_1;
+; SM60-NEXT: $L__BB6_3: // %partword.cmpxchg.end
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic
+ ret i8 %new
}
-define i8 @strong_release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_release_acquire_i8_global_cta(
+define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: release_acquire_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_release_acquire_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [release_acquire_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [release_acquire_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_release_acquire_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [release_acquire_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_release_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB75_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB7_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB75_3;
+; SM60-NEXT: @%p1 bra $L__BB7_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB75_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB75_1;
-; SM60-NEXT: $L__BB75_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB7_1;
+; SM60-NEXT: $L__BB7_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire
+ ret i8 %new
}
-define i8 @strong_release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_release_seq_cst_i8_global_cta(
+define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: release_seq_cst_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_release_seq_cst_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [release_seq_cst_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [release_seq_cst_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_release_seq_cst_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [release_seq_cst_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_release_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB76_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB8_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB76_3;
+; SM60-NEXT: @%p1 bra $L__BB8_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB76_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB76_1;
-; SM60-NEXT: $L__BB76_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB8_1;
+; SM60-NEXT: $L__BB8_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst
+ ret i8 %new
}
-define i8 @strong_acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_acq_rel_monotonic_i8_global_cta(
+define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: acq_rel_monotonic_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_monotonic_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_monotonic_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_acq_rel_monotonic_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [acq_rel_monotonic_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_acq_rel_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB77_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB9_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB77_3;
+; SM60-NEXT: @%p1 bra $L__BB9_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB77_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB77_1;
-; SM60-NEXT: $L__BB77_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB9_1;
+; SM60-NEXT: $L__BB9_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic
+ ret i8 %new
}
-define i8 @strong_acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i8_global_cta(
+define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: acq_rel_acquire_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB78_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB10_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB78_3;
+; SM60-NEXT: @%p1 bra $L__BB10_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB78_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB78_1;
-; SM60-NEXT: $L__BB78_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB10_1;
+; SM60-NEXT: $L__BB10_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
+ ret i8 %new
}
-define i8 @strong_acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_acq_rel_seq_cst_i8_global_cta(
+define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: acq_rel_seq_cst_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_seq_cst_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_seq_cst_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_acq_rel_seq_cst_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [acq_rel_seq_cst_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_acq_rel_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB79_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB11_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB79_3;
+; SM60-NEXT: @%p1 bra $L__BB11_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB79_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB79_1;
-; SM60-NEXT: $L__BB79_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB11_1;
+; SM60-NEXT: $L__BB11_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst
+ ret i8 %new
}
-define i8 @strong_seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_seq_cst_monotonic_i8_global_cta(
+define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: seq_cst_monotonic_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_seq_cst_monotonic_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [seq_cst_monotonic_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_seq_cst_monotonic_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [seq_cst_monotonic_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_seq_cst_monotonic_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB80_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB12_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB80_3;
+; SM60-NEXT: @%p1 bra $L__BB12_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB80_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB80_1;
-; SM60-NEXT: $L__BB80_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB12_1;
+; SM60-NEXT: $L__BB12_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic
+ ret i8 %new
}
-define i8 @strong_seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_seq_cst_acquire_i8_global_cta(
+define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: seq_cst_acquire_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_seq_cst_acquire_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [seq_cst_acquire_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_seq_cst_acquire_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [seq_cst_acquire_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_seq_cst_acquire_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB81_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB13_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB81_3;
+; SM60-NEXT: @%p1 bra $L__BB13_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB81_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB81_1;
-; SM60-NEXT: $L__BB81_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB13_1;
+; SM60-NEXT: $L__BB13_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire
+ ret i8 %new
}
-define i8 @strong_seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_seq_cst_seq_cst_i8_global_cta(
+define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: seq_cst_seq_cst_i8_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_seq_cst_seq_cst_i8_global_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [seq_cst_seq_cst_i8_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i8_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_seq_cst_seq_cst_i8_global_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [seq_cst_seq_cst_i8_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_seq_cst_seq_cst_i8_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB82_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB14_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB82_3;
+; SM60-NEXT: @%p1 bra $L__BB14_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB82_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB82_1;
-; SM60-NEXT: $L__BB82_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB14_1;
+; SM60-NEXT: $L__BB14_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst
+ ret i8 %new
}
-define i16 @strong_monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_monotonic_monotonic_i16_global_cta(
+define i16 @monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: monotonic_monotonic_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_monotonic_monotonic_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [monotonic_monotonic_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i16_global_cta_param_0];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [strong_monotonic_monotonic_i16_global_cta_param_1];
-; SM60-NEXT: ld.param.b16 %r8, [strong_monotonic_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: ld.param.b16 %r7, [monotonic_monotonic_i16_global_cta_param_1];
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB83_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB15_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB83_3;
+; SM60-NEXT: @%p1 bra $L__BB15_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB83_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB15_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB83_1;
-; SM60-NEXT: $L__BB83_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB15_1;
+; SM60-NEXT: $L__BB15_3: // %partword.cmpxchg.end
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic
+ ret i16 %new
}
-define i16 @strong_monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_monotonic_acquire_i16_global_cta(
+define i16 @monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: monotonic_acquire_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_monotonic_acquire_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [monotonic_acquire_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i16_global_cta_param_0];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [strong_monotonic_acquire_i16_global_cta_param_1];
-; SM60-NEXT: ld.param.b16 %r8, [strong_monotonic_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: ld.param.b16 %r7, [monotonic_acquire_i16_global_cta_param_1];
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB84_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB16_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB84_3;
+; SM60-NEXT: @%p1 bra $L__BB16_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB84_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB16_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB84_1;
-; SM60-NEXT: $L__BB84_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB16_1;
+; SM60-NEXT: $L__BB16_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire
+ ret i16 %new
}
-define i16 @strong_monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_monotonic_seq_cst_i16_global_cta(
+define i16 @monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: monotonic_seq_cst_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_monotonic_seq_cst_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [monotonic_seq_cst_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_monotonic_seq_cst_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [monotonic_seq_cst_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_monotonic_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB85_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB17_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB85_3;
+; SM60-NEXT: @%p1 bra $L__BB17_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB85_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB17_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB85_1;
-; SM60-NEXT: $L__BB85_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB17_1;
+; SM60-NEXT: $L__BB17_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst
+ ret i16 %new
}
-define i16 @strong_acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_acquire_monotonic_i16_global_cta(
+define i16 @acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: acquire_monotonic_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acquire_monotonic_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [acquire_monotonic_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i16_global_cta_param_0];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [strong_acquire_monotonic_i16_global_cta_param_1];
-; SM60-NEXT: ld.param.b16 %r8, [strong_acquire_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: ld.param.b16 %r7, [acquire_monotonic_i16_global_cta_param_1];
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB86_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB18_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB86_3;
+; SM60-NEXT: @%p1 bra $L__BB18_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB86_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB18_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB86_1;
-; SM60-NEXT: $L__BB86_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB18_1;
+; SM60-NEXT: $L__BB18_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic
+ ret i16 %new
}
-define i16 @strong_acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_acquire_acquire_i16_global_cta(
+define i16 @acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: acquire_acquire_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acquire_acquire_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [acquire_acquire_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acquire_acquire_i16_global_cta_param_0];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r7, [strong_acquire_acquire_i16_global_cta_param_1];
-; SM60-NEXT: ld.param.b16 %r8, [strong_acquire_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: ld.param.b16 %r7, [acquire_acquire_i16_global_cta_param_1];
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB87_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB19_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB87_3;
+; SM60-NEXT: @%p1 bra $L__BB19_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB87_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB19_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB87_1;
-; SM60-NEXT: $L__BB87_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB19_1;
+; SM60-NEXT: $L__BB19_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire
+ ret i16 %new
}
-define i16 @strong_acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_acquire_seq_cst_i16_global_cta(
+define i16 @acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: acquire_seq_cst_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acquire_seq_cst_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [acquire_seq_cst_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_acquire_seq_cst_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [acquire_seq_cst_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_acquire_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB88_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB20_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB88_3;
+; SM60-NEXT: @%p1 bra $L__BB20_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB88_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB20_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB88_1;
-; SM60-NEXT: $L__BB88_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB20_1;
+; SM60-NEXT: $L__BB20_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst
+ ret i16 %new
}
-define i16 @strong_release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_release_monotonic_i16_global_cta(
+define i16 @release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: release_monotonic_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_release_monotonic_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [release_monotonic_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [release_monotonic_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_release_monotonic_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [release_monotonic_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_release_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB89_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB21_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB89_3;
+; SM60-NEXT: @%p1 bra $L__BB21_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB89_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB21_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB89_1;
-; SM60-NEXT: $L__BB89_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB21_1;
+; SM60-NEXT: $L__BB21_3: // %partword.cmpxchg.end
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic
+ ret i16 %new
}
-define i16 @strong_release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_release_acquire_i16_global_cta(
+define i16 @release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: release_acquire_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_release_acquire_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [release_acquire_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [release_acquire_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_release_acquire_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [release_acquire_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_release_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB90_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB22_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB90_3;
+; SM60-NEXT: @%p1 bra $L__BB22_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB90_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB22_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB90_1;
-; SM60-NEXT: $L__BB90_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB22_1;
+; SM60-NEXT: $L__BB22_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire
+ ret i16 %new
}
-define i16 @strong_release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_release_seq_cst_i16_global_cta(
+define i16 @release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: release_seq_cst_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_release_seq_cst_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [release_seq_cst_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [release_seq_cst_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_release_seq_cst_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [release_seq_cst_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_release_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB91_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB23_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB91_3;
+; SM60-NEXT: @%p1 bra $L__BB23_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB91_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB23_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB91_1;
-; SM60-NEXT: $L__BB91_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB23_1;
+; SM60-NEXT: $L__BB23_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst
+ ret i16 %new
}
-define i16 @strong_acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_acq_rel_monotonic_i16_global_cta(
+define i16 @acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: acq_rel_monotonic_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_monotonic_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [acq_rel_monotonic_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_acq_rel_monotonic_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [acq_rel_monotonic_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_acq_rel_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB92_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB24_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB92_3;
+; SM60-NEXT: @%p1 bra $L__BB24_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB92_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB24_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB92_1;
-; SM60-NEXT: $L__BB92_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB24_1;
+; SM60-NEXT: $L__BB24_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic
+ ret i16 %new
}
-define i16 @strong_acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i16_global_cta(
+define i16 @acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: acq_rel_acquire_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [acq_rel_acquire_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_acq_rel_acquire_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [acq_rel_acquire_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_acq_rel_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB93_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB25_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB93_3;
+; SM60-NEXT: @%p1 bra $L__BB25_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB93_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB25_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB93_1;
-; SM60-NEXT: $L__BB93_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB25_1;
+; SM60-NEXT: $L__BB25_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire
+ ret i16 %new
}
-define i16 @strong_acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_acq_rel_seq_cst_i16_global_cta(
+define i16 @acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: acq_rel_seq_cst_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_seq_cst_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [acq_rel_seq_cst_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_acq_rel_seq_cst_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [acq_rel_seq_cst_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_acq_rel_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB94_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB26_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB94_3;
+; SM60-NEXT: @%p1 bra $L__BB26_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB94_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB26_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB94_1;
-; SM60-NEXT: $L__BB94_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB26_1;
+; SM60-NEXT: $L__BB26_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst
+ ret i16 %new
}
-define i16 @strong_seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_seq_cst_monotonic_i16_global_cta(
+define i16 @seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: seq_cst_monotonic_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_seq_cst_monotonic_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [seq_cst_monotonic_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_seq_cst_monotonic_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [seq_cst_monotonic_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_seq_cst_monotonic_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB95_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB27_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB95_3;
+; SM60-NEXT: @%p1 bra $L__BB27_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB95_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB27_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB95_1;
-; SM60-NEXT: $L__BB95_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB27_1;
+; SM60-NEXT: $L__BB27_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic
+ ret i16 %new
}
-define i16 @strong_seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_seq_cst_acquire_i16_global_cta(
+define i16 @seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: seq_cst_acquire_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_seq_cst_acquire_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [seq_cst_acquire_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_seq_cst_acquire_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [seq_cst_acquire_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_seq_cst_acquire_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB96_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB28_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB96_3;
+; SM60-NEXT: @%p1 bra $L__BB28_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB96_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB28_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB96_1;
-; SM60-NEXT: $L__BB96_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB28_1;
+; SM60-NEXT: $L__BB28_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire
+ ret i16 %new
}
-define i16 @strong_seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM60-LABEL: strong_seq_cst_seq_cst_i16_global_cta(
+define i16 @seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM60-LABEL: seq_cst_seq_cst_i16_global_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_seq_cst_seq_cst_i16_global_cta_param_0];
+; SM60-NEXT: ld.param.b16 %rs1, [seq_cst_seq_cst_i16_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i16_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b16 %r7, [strong_seq_cst_seq_cst_i16_global_cta_param_1];
+; SM60-NEXT: ld.param.b16 %r7, [seq_cst_seq_cst_i16_global_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b16 %r8, [strong_seq_cst_seq_cst_i16_global_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 65535;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 65535;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB97_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB29_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB97_3;
+; SM60-NEXT: @%p1 bra $L__BB29_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB97_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB29_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB97_1;
-; SM60-NEXT: $L__BB97_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB29_1;
+; SM60-NEXT: $L__BB29_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst
+ ret i16 %new
}
-define i32 @strong_monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_monotonic_monotonic_i32_global_cta(
+define i32 @monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: monotonic_monotonic_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_monotonic_monotonic_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_monotonic_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_monotonic_monotonic_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [monotonic_monotonic_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [monotonic_monotonic_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic
+ ret i32 %new
}
-define i32 @strong_monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_monotonic_acquire_i32_global_cta(
+define i32 @monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: monotonic_acquire_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_monotonic_acquire_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_monotonic_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_monotonic_acquire_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [monotonic_acquire_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [monotonic_acquire_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire
+ ret i32 %new
}
-define i32 @strong_monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_monotonic_seq_cst_i32_global_cta(
+define i32 @monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: monotonic_seq_cst_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_monotonic_seq_cst_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i32_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [strong_monotonic_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_monotonic_seq_cst_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b32 %r1, [monotonic_seq_cst_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [monotonic_seq_cst_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst
+ ret i32 %new
}
-define i32 @strong_acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acquire_monotonic_i32_global_cta(
+define i32 @acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acquire_monotonic_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acquire_monotonic_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_acquire_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acquire_monotonic_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [acquire_monotonic_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acquire_monotonic_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic
+ ret i32 %new
}
-define i32 @strong_acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acquire_acquire_i32_global_cta(
+define i32 @acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acquire_acquire_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acquire_acquire_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_acquire_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acquire_acquire_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acquire_acquire_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [acquire_acquire_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acquire_acquire_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire
+ ret i32 %new
}
-define i32 @strong_acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acquire_seq_cst_i32_global_cta(
+define i32 @acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acquire_seq_cst_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acquire_seq_cst_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i32_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [strong_acquire_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acquire_seq_cst_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b32 %r1, [acquire_seq_cst_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acquire_seq_cst_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst
+ ret i32 %new
}
-define i32 @strong_release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_release_monotonic_i32_global_cta(
+define i32 @release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: release_monotonic_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_release_monotonic_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_release_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_release_monotonic_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [release_monotonic_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [release_monotonic_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [release_monotonic_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic
+ ret i32 %new
}
-define i32 @strong_release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_release_acquire_i32_global_cta(
+define i32 @release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: release_acquire_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_release_acquire_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_release_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_release_acquire_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [release_acquire_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [release_acquire_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [release_acquire_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire
+ ret i32 %new
}
-define i32 @strong_release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_release_seq_cst_i32_global_cta(
+define i32 @release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: release_seq_cst_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_release_seq_cst_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [release_seq_cst_i32_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [strong_release_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_release_seq_cst_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b32 %r1, [release_seq_cst_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [release_seq_cst_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst
+ ret i32 %new
}
-define i32 @strong_acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acq_rel_monotonic_i32_global_cta(
+define i32 @acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acq_rel_monotonic_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_monotonic_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_acq_rel_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acq_rel_monotonic_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [acq_rel_monotonic_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acq_rel_monotonic_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i32_global_cta(
+define i32 @acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acq_rel_acquire_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acq_rel_seq_cst_i32_global_cta(
+define i32 @acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acq_rel_seq_cst_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_seq_cst_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i32_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [strong_acq_rel_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acq_rel_seq_cst_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b32 %r1, [acq_rel_seq_cst_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acq_rel_seq_cst_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst
+ ret i32 %new
}
-define i32 @strong_seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_seq_cst_monotonic_i32_global_cta(
+define i32 @seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: seq_cst_monotonic_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_seq_cst_monotonic_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i32_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [strong_seq_cst_monotonic_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_seq_cst_monotonic_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b32 %r1, [seq_cst_monotonic_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [seq_cst_monotonic_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic
+ ret i32 %new
}
-define i32 @strong_seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_seq_cst_acquire_i32_global_cta(
+define i32 @seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: seq_cst_acquire_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_seq_cst_acquire_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i32_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [strong_seq_cst_acquire_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_seq_cst_acquire_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b32 %r1, [seq_cst_acquire_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [seq_cst_acquire_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire
+ ret i32 %new
}
-define i32 @strong_seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_seq_cst_seq_cst_i32_global_cta(
+define i32 @seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: seq_cst_seq_cst_i32_global_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_seq_cst_seq_cst_i32_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i32_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b32 %r1, [strong_seq_cst_seq_cst_i32_global_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_seq_cst_seq_cst_i32_global_cta_param_2];
+; SM60-NEXT: ld.param.b32 %r1, [seq_cst_seq_cst_i32_global_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [seq_cst_seq_cst_i32_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst
+ ret i32 %new
}
-define i64 @strong_monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_monotonic_monotonic_i64_global_cta(
+define i64 @monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: monotonic_monotonic_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_monotonic_monotonic_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [strong_monotonic_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_monotonic_monotonic_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [monotonic_monotonic_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic
+ ret i64 %new
}
-define i64 @strong_monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_monotonic_acquire_i64_global_cta(
+define i64 @monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: monotonic_acquire_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_monotonic_acquire_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [strong_monotonic_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_monotonic_acquire_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [monotonic_acquire_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire
+ ret i64 %new
}
-define i64 @strong_monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_monotonic_seq_cst_i64_global_cta(
+define i64 @monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: monotonic_seq_cst_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_monotonic_seq_cst_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i64_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [strong_monotonic_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_monotonic_seq_cst_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [monotonic_seq_cst_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst
+ ret i64 %new
}
-define i64 @strong_acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_acquire_monotonic_i64_global_cta(
+define i64 @acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: acquire_monotonic_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acquire_monotonic_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acquire_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_acquire_monotonic_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [acquire_monotonic_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic
+ ret i64 %new
}
-define i64 @strong_acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_acquire_acquire_i64_global_cta(
+define i64 @acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: acquire_acquire_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acquire_acquire_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acquire_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_acquire_acquire_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acquire_acquire_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd2, [acquire_acquire_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [acquire_acquire_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire
+ ret i64 %new
}
-define i64 @strong_acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_acquire_seq_cst_i64_global_cta(
+define i64 @acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: acquire_seq_cst_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acquire_seq_cst_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i64_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acquire_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_acquire_seq_cst_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [acquire_seq_cst_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst
+ ret i64 %new
}
-define i64 @strong_release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_release_monotonic_i64_global_cta(
+define i64 @release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: release_monotonic_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_release_monotonic_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [strong_release_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_release_monotonic_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [release_monotonic_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd2, [release_monotonic_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [release_monotonic_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic
+ ret i64 %new
}
-define i64 @strong_release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_release_acquire_i64_global_cta(
+define i64 @release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: release_acquire_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_release_acquire_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [strong_release_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_release_acquire_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [release_acquire_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd2, [release_acquire_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [release_acquire_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire
+ ret i64 %new
}
-define i64 @strong_release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_release_seq_cst_i64_global_cta(
+define i64 @release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: release_seq_cst_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_release_seq_cst_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [release_seq_cst_i64_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [strong_release_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_release_seq_cst_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [release_seq_cst_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [release_seq_cst_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst
+ ret i64 %new
}
-define i64 @strong_acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_acq_rel_monotonic_i64_global_cta(
+define i64 @acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: acq_rel_monotonic_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_monotonic_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_acq_rel_monotonic_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [acq_rel_monotonic_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic
+ ret i64 %new
}
-define i64 @strong_acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i64_global_cta(
+define i64 @acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: acq_rel_acquire_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i64_global_cta_param_0];
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_acq_rel_acquire_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [acq_rel_acquire_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire
+ ret i64 %new
}
-define i64 @strong_acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_acq_rel_seq_cst_i64_global_cta(
+define i64 @acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: acq_rel_seq_cst_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_seq_cst_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i64_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_acq_rel_seq_cst_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [acq_rel_seq_cst_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst
+ ret i64 %new
}
-define i64 @strong_seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_seq_cst_monotonic_i64_global_cta(
+define i64 @seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: seq_cst_monotonic_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_seq_cst_monotonic_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i64_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [strong_seq_cst_monotonic_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_seq_cst_monotonic_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [seq_cst_monotonic_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic
+ ret i64 %new
}
-define i64 @strong_seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_seq_cst_acquire_i64_global_cta(
+define i64 @seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: seq_cst_acquire_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_seq_cst_acquire_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i64_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [strong_seq_cst_acquire_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_seq_cst_acquire_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [seq_cst_acquire_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire
+ ret i64 %new
}
-define i64 @strong_seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM60-LABEL: strong_seq_cst_seq_cst_i64_global_cta(
+define i64 @seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM60-LABEL: seq_cst_seq_cst_i64_global_cta(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<5>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_seq_cst_seq_cst_i64_global_cta_param_0];
+; SM60-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i64_global_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b64 %rd2, [strong_seq_cst_seq_cst_i64_global_cta_param_1];
-; SM60-NEXT: ld.param.b64 %rd3, [strong_seq_cst_seq_cst_i64_global_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i64_global_cta_param_1];
+; SM60-NEXT: ld.param.b64 %rd3, [seq_cst_seq_cst_i64_global_cta_param_2];
; SM60-NEXT: atom.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM60-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM60-NEXT: st.param.b64 [func_retval0], %rd3;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst
+ ret i64 %new
}
-define i8 @strong_acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i8_global(
+define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: acq_rel_acquire_i8_global(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_global_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_global_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_global_param_0];
; SM60-NEXT: membar.sys;
-; SM60-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_global_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_global_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_global_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.global.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB128_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB60_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB128_3;
+; SM60-NEXT: @%p1 bra $L__BB60_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB128_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB128_1;
-; SM60-NEXT: $L__BB128_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB60_1;
+; SM60-NEXT: $L__BB60_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire
+ ret i8 %new
}
-define i32 @strong_acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i32_global(
+define i32 @acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acq_rel_acquire_i32_global(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_param_2];
; SM60-NEXT: atom.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i32_global_sys(
+define i32 @acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acq_rel_acquire_i32_global_sys(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_sys_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_sys_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_sys_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_sys_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_sys_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_sys_param_2];
; SM60-NEXT: atom.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i32_global_gpu(
+define i32 @acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acq_rel_acquire_i32_global_gpu(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_gpu_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_gpu_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_gpu_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_gpu_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_gpu_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_gpu_param_2];
; SM60-NEXT: atom.gpu.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire
+ ret i32 %new
}
-define i8 @strong_acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i8_generic_cta(
+define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: acq_rel_acquire_i8_generic_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_generic_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_generic_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_generic_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_generic_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_generic_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_generic_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB132_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB64_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB132_3;
+; SM60-NEXT: @%p1 bra $L__BB64_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB132_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB64_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB132_1;
-; SM60-NEXT: $L__BB132_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB64_1;
+; SM60-NEXT: $L__BB64_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
+ ret i8 %new
}
-define i8 @strong_acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i8_shared_cta(
+define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
+; SM60-LABEL: acq_rel_acquire_i8_shared_cta(
; SM60: {
; SM60-NEXT: .reg .pred %p<3>;
-; SM60-NEXT: .reg .b32 %r<18>;
+; SM60-NEXT: .reg .b16 %rs<2>;
+; SM60-NEXT: .reg .b32 %r<17>;
; SM60-NEXT: .reg .b64 %rd<3>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_shared_cta_param_0];
+; SM60-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_shared_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_shared_cta_param_0];
; SM60-NEXT: membar.cta;
-; SM60-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_shared_cta_param_1];
+; SM60-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_shared_cta_param_1];
; SM60-NEXT: and.b64 %rd1, %rd2, -4;
-; SM60-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_shared_cta_param_2];
-; SM60-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM60-NEXT: and.b32 %r10, %r9, 3;
-; SM60-NEXT: shl.b32 %r1, %r10, 3;
-; SM60-NEXT: mov.b32 %r11, 255;
-; SM60-NEXT: shl.b32 %r12, %r11, %r1;
-; SM60-NEXT: not.b32 %r2, %r12;
-; SM60-NEXT: shl.b32 %r3, %r8, %r1;
+; SM60-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM60-NEXT: and.b32 %r9, %r8, 3;
+; SM60-NEXT: shl.b32 %r1, %r9, 3;
+; SM60-NEXT: mov.b32 %r10, 255;
+; SM60-NEXT: shl.b32 %r11, %r10, %r1;
+; SM60-NEXT: not.b32 %r2, %r11;
+; SM60-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM60-NEXT: shl.b32 %r3, %r12, %r1;
; SM60-NEXT: shl.b32 %r4, %r7, %r1;
; SM60-NEXT: ld.shared.b32 %r13, [%rd1];
-; SM60-NEXT: and.b32 %r17, %r13, %r2;
-; SM60-NEXT: $L__BB133_1: // %partword.cmpxchg.loop
+; SM60-NEXT: and.b32 %r16, %r13, %r2;
+; SM60-NEXT: $L__BB65_1: // %partword.cmpxchg.loop
; SM60-NEXT: // =>This Inner Loop Header: Depth=1
-; SM60-NEXT: or.b32 %r14, %r17, %r3;
-; SM60-NEXT: or.b32 %r15, %r17, %r4;
+; SM60-NEXT: or.b32 %r14, %r16, %r3;
+; SM60-NEXT: or.b32 %r15, %r16, %r4;
; SM60-NEXT: atom.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
; SM60-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM60-NEXT: @%p1 bra $L__BB133_3;
+; SM60-NEXT: @%p1 bra $L__BB65_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM60-NEXT: // in Loop: Header=BB133_1 Depth=1
+; SM60-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM60-NEXT: and.b32 %r6, %r5, %r2;
-; SM60-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM60-NEXT: mov.b32 %r17, %r6;
-; SM60-NEXT: @%p2 bra $L__BB133_1;
-; SM60-NEXT: $L__BB133_3: // %partword.cmpxchg.end
-; SM60-NEXT: shr.u32 %r16, %r5, %r1;
+; SM60-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM60-NEXT: mov.b32 %r16, %r6;
+; SM60-NEXT: @%p2 bra $L__BB65_1;
+; SM60-NEXT: $L__BB65_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.cta;
-; SM60-NEXT: st.param.b32 [func_retval0], %r16;
+; SM60-NEXT: st.param.b32 [func_retval0], %r12;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
+ ret i8 %new
}
-define i32 @strong_acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i32_generic_cta(
+define i32 @acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acq_rel_acquire_i32_generic_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_generic_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_generic_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_generic_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_generic_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_generic_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_generic_cta_param_2];
; SM60-NEXT: atom.cta.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) {
-; SM60-LABEL: strong_acq_rel_acquire_i32_shared_cta(
+define i32 @acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) {
+; SM60-LABEL: acq_rel_acquire_i32_shared_cta(
; SM60: {
; SM60-NEXT: .reg .b32 %r<4>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
-; SM60-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_shared_cta_param_0];
-; SM60-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_shared_cta_param_1];
-; SM60-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_shared_cta_param_2];
+; SM60-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_shared_cta_param_0];
+; SM60-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_shared_cta_param_1];
+; SM60-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_shared_cta_param_2];
; SM60-NEXT: atom.cta.shared.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM60-NEXT: st.param.b32 [func_retval0], %r3;
+; SM60-NEXT: st.param.b32 [func_retval0], %r2;
; SM60-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
+ ret i32 %new
}
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
index be3a81dea77c6..76220ee3a3996 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
@@ -1,3878 +1,2104 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s --check-prefix=SM70
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
+; RUN: %if ptxas-sm_70 && ptxas-isa-6.3 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify -arch=sm_70 %}
-define i8 @weak_monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_monotonic_monotonic_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_monotonic_monotonic_i8_global_cta_param_0];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r6, [weak_monotonic_monotonic_i8_global_cta_param_1];
-; SM70-NEXT: ld.param.b8 %r7, [weak_monotonic_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_monotonic_acquire_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_monotonic_acquire_i8_global_cta_param_0];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r6, [weak_monotonic_acquire_i8_global_cta_param_1];
-; SM70-NEXT: ld.param.b8 %r7, [weak_monotonic_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_monotonic_seq_cst_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_monotonic_seq_cst_i8_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_monotonic_seq_cst_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_monotonic_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_acquire_monotonic_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acquire_monotonic_i8_global_cta_param_0];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r6, [weak_acquire_monotonic_i8_global_cta_param_1];
-; SM70-NEXT: ld.param.b8 %r7, [weak_acquire_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_acquire_acquire_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acquire_acquire_i8_global_cta_param_0];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r6, [weak_acquire_acquire_i8_global_cta_param_1];
-; SM70-NEXT: ld.param.b8 %r7, [weak_acquire_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_acquire_seq_cst_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acquire_seq_cst_i8_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_acquire_seq_cst_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_acquire_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_release_monotonic_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_release_monotonic_i8_global_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_release_monotonic_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_release_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_release_acquire_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_release_acquire_i8_global_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_release_acquire_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_release_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_release_seq_cst_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_release_seq_cst_i8_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_release_seq_cst_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_release_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_acq_rel_monotonic_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_monotonic_i8_global_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_acq_rel_monotonic_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_acq_rel_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_global_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_acq_rel_seq_cst_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_seq_cst_i8_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_acq_rel_seq_cst_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_acq_rel_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_seq_cst_monotonic_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_seq_cst_monotonic_i8_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_seq_cst_monotonic_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_seq_cst_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_seq_cst_acquire_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_seq_cst_acquire_i8_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_seq_cst_acquire_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_seq_cst_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_seq_cst_seq_cst_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_seq_cst_seq_cst_i8_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_seq_cst_seq_cst_i8_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_seq_cst_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i16 @weak_monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_monotonic_monotonic_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_monotonic_monotonic_i16_global_cta_param_0];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r6, [weak_monotonic_monotonic_i16_global_cta_param_1];
-; SM70-NEXT: ld.param.b16 %r7, [weak_monotonic_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_monotonic_acquire_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_monotonic_acquire_i16_global_cta_param_0];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r6, [weak_monotonic_acquire_i16_global_cta_param_1];
-; SM70-NEXT: ld.param.b16 %r7, [weak_monotonic_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_monotonic_seq_cst_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_monotonic_seq_cst_i16_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_monotonic_seq_cst_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_monotonic_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_acquire_monotonic_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acquire_monotonic_i16_global_cta_param_0];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r6, [weak_acquire_monotonic_i16_global_cta_param_1];
-; SM70-NEXT: ld.param.b16 %r7, [weak_acquire_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_acquire_acquire_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acquire_acquire_i16_global_cta_param_0];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r6, [weak_acquire_acquire_i16_global_cta_param_1];
-; SM70-NEXT: ld.param.b16 %r7, [weak_acquire_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_acquire_seq_cst_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acquire_seq_cst_i16_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_acquire_seq_cst_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_acquire_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_release_monotonic_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_release_monotonic_i16_global_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_release_monotonic_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_release_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_release_acquire_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_release_acquire_i16_global_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_release_acquire_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_release_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_release_seq_cst_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_release_seq_cst_i16_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_release_seq_cst_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_release_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_acq_rel_monotonic_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_monotonic_i16_global_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_acq_rel_monotonic_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_acq_rel_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i16_global_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_acq_rel_acquire_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_acq_rel_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_acq_rel_seq_cst_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_seq_cst_i16_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_acq_rel_seq_cst_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_acq_rel_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_seq_cst_monotonic_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_seq_cst_monotonic_i16_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_seq_cst_monotonic_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_seq_cst_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_seq_cst_acquire_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_seq_cst_acquire_i16_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_seq_cst_acquire_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_seq_cst_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: weak_seq_cst_seq_cst_i16_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_seq_cst_seq_cst_i16_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r6, [weak_seq_cst_seq_cst_i16_global_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [weak_seq_cst_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 65535;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i32 @weak_monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_monotonic_monotonic_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_monotonic_monotonic_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_monotonic_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_monotonic_monotonic_i32_global_cta_param_2];
-; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_monotonic_acquire_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_monotonic_acquire_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_monotonic_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_monotonic_acquire_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_monotonic_seq_cst_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_monotonic_seq_cst_i32_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [weak_monotonic_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_monotonic_seq_cst_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acquire_monotonic_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acquire_monotonic_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_acquire_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acquire_monotonic_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acquire_acquire_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acquire_acquire_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_acquire_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acquire_acquire_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acquire_seq_cst_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acquire_seq_cst_i32_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [weak_acquire_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acquire_seq_cst_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_release_monotonic_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_release_monotonic_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_release_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_release_monotonic_i32_global_cta_param_2];
-; SM70-NEXT: atom.release.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_release_acquire_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_release_acquire_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_release_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_release_acquire_i32_global_cta_param_2];
-; SM70-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_release_seq_cst_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_release_seq_cst_i32_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [weak_release_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_release_seq_cst_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acq_rel_monotonic_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_monotonic_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_acq_rel_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acq_rel_monotonic_i32_global_cta_param_2];
-; SM70-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_cta_param_2];
-; SM70-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acq_rel_seq_cst_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_seq_cst_i32_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [weak_acq_rel_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acq_rel_seq_cst_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_seq_cst_monotonic_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_seq_cst_monotonic_i32_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [weak_seq_cst_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_seq_cst_monotonic_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_seq_cst_acquire_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_seq_cst_acquire_i32_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [weak_seq_cst_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_seq_cst_acquire_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_seq_cst_seq_cst_i32_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_seq_cst_seq_cst_i32_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [weak_seq_cst_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_seq_cst_seq_cst_i32_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i64 @weak_monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_monotonic_monotonic_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_monotonic_monotonic_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [weak_monotonic_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_monotonic_monotonic_i64_global_cta_param_2];
-; SM70-NEXT: atom.relaxed.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_monotonic_acquire_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_monotonic_acquire_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [weak_monotonic_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_monotonic_acquire_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_monotonic_seq_cst_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_monotonic_seq_cst_i64_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [weak_monotonic_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_monotonic_seq_cst_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_acquire_monotonic_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acquire_monotonic_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acquire_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_acquire_monotonic_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_acquire_acquire_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acquire_acquire_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acquire_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_acquire_acquire_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_acquire_seq_cst_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acquire_seq_cst_i64_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acquire_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_acquire_seq_cst_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_release_monotonic_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_release_monotonic_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [weak_release_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_release_monotonic_i64_global_cta_param_2];
-; SM70-NEXT: atom.release.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_release_acquire_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_release_acquire_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [weak_release_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_release_acquire_i64_global_cta_param_2];
-; SM70-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_release_seq_cst_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_release_seq_cst_i64_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [weak_release_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_release_seq_cst_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_acq_rel_monotonic_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_monotonic_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_acq_rel_monotonic_i64_global_cta_param_2];
-; SM70-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_acq_rel_acquire_i64_global_cta_param_2];
-; SM70-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_acq_rel_seq_cst_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_seq_cst_i64_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_acq_rel_seq_cst_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_seq_cst_monotonic_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_seq_cst_monotonic_i64_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [weak_seq_cst_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_seq_cst_monotonic_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_seq_cst_acquire_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_seq_cst_acquire_i64_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [weak_seq_cst_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_seq_cst_acquire_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: weak_seq_cst_seq_cst_i64_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .b64 %rd<5>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_seq_cst_seq_cst_i64_global_cta_param_0];
-; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [weak_seq_cst_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [weak_seq_cst_seq_cst_i64_global_cta_param_2];
-; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i8_global(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_global_param_0];
-; SM70-NEXT: fence.acq_rel.sys;
-; SM70-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_global_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_global_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.sys;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i32_global(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_param_2];
-; SM70-NEXT: atom.acq_rel.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i32_global_sys(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_sys_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_sys_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_sys_param_2];
-; SM70-NEXT: atom.acq_rel.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i32_global_gpu(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_gpu_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_gpu_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_gpu_param_2];
-; SM70-NEXT: atom.acq_rel.gpu.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i8_generic_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<17>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_generic_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_generic_cta_param_1];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_generic_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM70-NEXT: and.b32 %r9, %r8, 3;
-; SM70-NEXT: shl.b32 %r1, %r9, 3;
-; SM70-NEXT: mov.b32 %r10, 255;
-; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i8_shared_cta(
+define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: monotonic_monotonic_i8_global_cta(
; SM70: {
+; SM70-NEXT: .reg .pred %p<3>;
+; SM70-NEXT: .reg .b16 %rs<2>;
; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_shared_cta_param_0];
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_shared_cta_param_1];
+; SM70-NEXT: ld.param.b8 %rs1, [monotonic_monotonic_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i8_global_cta_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_shared_cta_param_2];
+; SM70-NEXT: ld.param.b8 %r7, [monotonic_monotonic_i8_global_cta_param_1];
; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
; SM70-NEXT: and.b32 %r9, %r8, 3;
; SM70-NEXT: shl.b32 %r1, %r9, 3;
; SM70-NEXT: mov.b32 %r10, 255;
; SM70-NEXT: shl.b32 %r11, %r10, %r1;
-; SM70-NEXT: not.b32 %r12, %r11;
-; SM70-NEXT: shl.b32 %r2, %r7, %r1;
-; SM70-NEXT: shl.b32 %r3, %r6, %r1;
-; SM70-NEXT: ld.shared.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r4, %r13, %r12;
-; SM70-NEXT: or.b32 %r14, %r4, %r2;
-; SM70-NEXT: or.b32 %r15, %r4, %r3;
-; SM70-NEXT: atom.relaxed.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i32_generic_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_generic_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_generic_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_generic_cta_param_2];
-; SM70-NEXT: atom.acq_rel.cta.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: weak_acq_rel_acquire_i32_shared_cta(
-; SM70: {
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .b64 %rd<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_shared_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_shared_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_shared_cta_param_2];
-; SM70-NEXT: atom.acq_rel.cta.shared.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
-; SM70-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i8 @strong_monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_monotonic_monotonic_i8_global_cta(
-; SM70: {
-; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
-; SM70-NEXT: .reg .b64 %rd<3>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_monotonic_monotonic_i8_global_cta_param_0];
-; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [strong_monotonic_monotonic_i8_global_cta_param_1];
-; SM70-NEXT: ld.param.b8 %r8, [strong_monotonic_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB68_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB68_3;
+; SM70-NEXT: @%p1 bra $L__BB0_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB68_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB68_1;
-; SM70-NEXT: $L__BB68_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB0_1;
+; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic
+ ret i8 %new
}
-define i8 @strong_monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_monotonic_acquire_i8_global_cta(
+define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: monotonic_acquire_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_monotonic_acquire_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [monotonic_acquire_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i8_global_cta_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [strong_monotonic_acquire_i8_global_cta_param_1];
-; SM70-NEXT: ld.param.b8 %r8, [strong_monotonic_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: ld.param.b8 %r7, [monotonic_acquire_i8_global_cta_param_1];
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB69_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB69_3;
+; SM70-NEXT: @%p1 bra $L__BB1_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB69_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB69_1;
-; SM70-NEXT: $L__BB69_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB1_1;
+; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire
+ ret i8 %new
}
-define i8 @strong_monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_monotonic_seq_cst_i8_global_cta(
+define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: monotonic_seq_cst_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_monotonic_seq_cst_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [monotonic_seq_cst_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i8_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_monotonic_seq_cst_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [monotonic_seq_cst_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_monotonic_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB70_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB70_3;
+; SM70-NEXT: @%p1 bra $L__BB2_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB70_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB70_1;
-; SM70-NEXT: $L__BB70_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB2_1;
+; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst
+ ret i8 %new
}
-define i8 @strong_acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_acquire_monotonic_i8_global_cta(
+define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: acquire_monotonic_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acquire_monotonic_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [acquire_monotonic_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i8_global_cta_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [strong_acquire_monotonic_i8_global_cta_param_1];
-; SM70-NEXT: ld.param.b8 %r8, [strong_acquire_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: ld.param.b8 %r7, [acquire_monotonic_i8_global_cta_param_1];
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB71_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB71_3;
+; SM70-NEXT: @%p1 bra $L__BB3_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB71_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB71_1;
-; SM70-NEXT: $L__BB71_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB3_1;
+; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic
+ ret i8 %new
}
-define i8 @strong_acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_acquire_acquire_i8_global_cta(
+define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: acquire_acquire_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acquire_acquire_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [acquire_acquire_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acquire_acquire_i8_global_cta_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r7, [strong_acquire_acquire_i8_global_cta_param_1];
-; SM70-NEXT: ld.param.b8 %r8, [strong_acquire_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: ld.param.b8 %r7, [acquire_acquire_i8_global_cta_param_1];
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB72_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB72_3;
+; SM70-NEXT: @%p1 bra $L__BB4_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB72_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB72_1;
-; SM70-NEXT: $L__BB72_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB4_1;
+; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire
+ ret i8 %new
}
-define i8 @strong_acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_acquire_seq_cst_i8_global_cta(
+define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: acquire_seq_cst_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acquire_seq_cst_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [acquire_seq_cst_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i8_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_acquire_seq_cst_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [acquire_seq_cst_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_acquire_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB73_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB5_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB73_3;
+; SM70-NEXT: @%p1 bra $L__BB5_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB73_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB73_1;
-; SM70-NEXT: $L__BB73_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB5_1;
+; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst
+ ret i8 %new
}
-define i8 @strong_release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_release_monotonic_i8_global_cta(
+define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: release_monotonic_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_release_monotonic_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [release_monotonic_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [release_monotonic_i8_global_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_release_monotonic_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [release_monotonic_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_release_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB74_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB6_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB74_3;
+; SM70-NEXT: @%p1 bra $L__BB6_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB74_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB74_1;
-; SM70-NEXT: $L__BB74_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB6_1;
+; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic
+ ret i8 %new
}
-define i8 @strong_release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_release_acquire_i8_global_cta(
+define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: release_acquire_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_release_acquire_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [release_acquire_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [release_acquire_i8_global_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_release_acquire_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [release_acquire_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_release_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB75_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB7_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB75_3;
+; SM70-NEXT: @%p1 bra $L__BB7_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB75_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB75_1;
-; SM70-NEXT: $L__BB75_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB7_1;
+; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire
+ ret i8 %new
}
-define i8 @strong_release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_release_seq_cst_i8_global_cta(
+define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: release_seq_cst_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_release_seq_cst_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [release_seq_cst_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [release_seq_cst_i8_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_release_seq_cst_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [release_seq_cst_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_release_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB76_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB8_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB76_3;
+; SM70-NEXT: @%p1 bra $L__BB8_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB76_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB76_1;
-; SM70-NEXT: $L__BB76_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB8_1;
+; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst
+ ret i8 %new
}
-define i8 @strong_acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_acq_rel_monotonic_i8_global_cta(
+define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: acq_rel_monotonic_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_monotonic_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [acq_rel_monotonic_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i8_global_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_acq_rel_monotonic_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [acq_rel_monotonic_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_acq_rel_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB77_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB9_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB77_3;
+; SM70-NEXT: @%p1 bra $L__BB9_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB77_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB77_1;
-; SM70-NEXT: $L__BB77_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB9_1;
+; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic
+ ret i8 %new
}
-define i8 @strong_acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i8_global_cta(
+define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: acq_rel_acquire_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_global_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB78_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB10_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB78_3;
+; SM70-NEXT: @%p1 bra $L__BB10_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB78_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB78_1;
-; SM70-NEXT: $L__BB78_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB10_1;
+; SM70-NEXT: $L__BB10_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
+ ret i8 %new
}
-define i8 @strong_acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_acq_rel_seq_cst_i8_global_cta(
+define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: acq_rel_seq_cst_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_seq_cst_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [acq_rel_seq_cst_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i8_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_acq_rel_seq_cst_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [acq_rel_seq_cst_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_acq_rel_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB79_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB11_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB79_3;
+; SM70-NEXT: @%p1 bra $L__BB11_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB79_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB79_1;
-; SM70-NEXT: $L__BB79_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB11_1;
+; SM70-NEXT: $L__BB11_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst
+ ret i8 %new
}
-define i8 @strong_seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_seq_cst_monotonic_i8_global_cta(
+define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: seq_cst_monotonic_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_seq_cst_monotonic_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [seq_cst_monotonic_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i8_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_seq_cst_monotonic_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [seq_cst_monotonic_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_seq_cst_monotonic_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB80_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB12_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB80_3;
+; SM70-NEXT: @%p1 bra $L__BB12_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB80_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB80_1;
-; SM70-NEXT: $L__BB80_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB12_1;
+; SM70-NEXT: $L__BB12_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic
+ ret i8 %new
}
-define i8 @strong_seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_seq_cst_acquire_i8_global_cta(
+define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: seq_cst_acquire_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_seq_cst_acquire_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [seq_cst_acquire_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i8_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_seq_cst_acquire_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [seq_cst_acquire_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_seq_cst_acquire_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB81_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB13_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB81_3;
+; SM70-NEXT: @%p1 bra $L__BB13_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB81_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB81_1;
-; SM70-NEXT: $L__BB81_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB13_1;
+; SM70-NEXT: $L__BB13_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire
+ ret i8 %new
}
-define i8 @strong_seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_seq_cst_seq_cst_i8_global_cta(
+define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: seq_cst_seq_cst_i8_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_seq_cst_seq_cst_i8_global_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [seq_cst_seq_cst_i8_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i8_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_seq_cst_seq_cst_i8_global_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [seq_cst_seq_cst_i8_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_seq_cst_seq_cst_i8_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB82_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB14_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB82_3;
+; SM70-NEXT: @%p1 bra $L__BB14_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB82_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB82_1;
-; SM70-NEXT: $L__BB82_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB14_1;
+; SM70-NEXT: $L__BB14_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst
+ ret i8 %new
}
-define i16 @strong_monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_monotonic_monotonic_i16_global_cta(
+define i16 @monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: monotonic_monotonic_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_monotonic_monotonic_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [monotonic_monotonic_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i16_global_cta_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [strong_monotonic_monotonic_i16_global_cta_param_1];
-; SM70-NEXT: ld.param.b16 %r8, [strong_monotonic_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: ld.param.b16 %r7, [monotonic_monotonic_i16_global_cta_param_1];
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB83_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB15_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB83_3;
+; SM70-NEXT: @%p1 bra $L__BB15_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB83_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB15_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB83_1;
-; SM70-NEXT: $L__BB83_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB15_1;
+; SM70-NEXT: $L__BB15_3: // %partword.cmpxchg.end
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic
+ ret i16 %new
}
-define i16 @strong_monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_monotonic_acquire_i16_global_cta(
+define i16 @monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: monotonic_acquire_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_monotonic_acquire_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [monotonic_acquire_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i16_global_cta_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [strong_monotonic_acquire_i16_global_cta_param_1];
-; SM70-NEXT: ld.param.b16 %r8, [strong_monotonic_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: ld.param.b16 %r7, [monotonic_acquire_i16_global_cta_param_1];
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB84_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB16_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB84_3;
+; SM70-NEXT: @%p1 bra $L__BB16_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB84_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB16_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB84_1;
-; SM70-NEXT: $L__BB84_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB16_1;
+; SM70-NEXT: $L__BB16_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire
+ ret i16 %new
}
-define i16 @strong_monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_monotonic_seq_cst_i16_global_cta(
+define i16 @monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: monotonic_seq_cst_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_monotonic_seq_cst_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [monotonic_seq_cst_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i16_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_monotonic_seq_cst_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [monotonic_seq_cst_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_monotonic_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB85_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB17_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB85_3;
+; SM70-NEXT: @%p1 bra $L__BB17_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB85_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB17_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB85_1;
-; SM70-NEXT: $L__BB85_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB17_1;
+; SM70-NEXT: $L__BB17_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst
+ ret i16 %new
}
-define i16 @strong_acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_acquire_monotonic_i16_global_cta(
+define i16 @acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: acquire_monotonic_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acquire_monotonic_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [acquire_monotonic_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i16_global_cta_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [strong_acquire_monotonic_i16_global_cta_param_1];
-; SM70-NEXT: ld.param.b16 %r8, [strong_acquire_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: ld.param.b16 %r7, [acquire_monotonic_i16_global_cta_param_1];
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB86_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB18_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB86_3;
+; SM70-NEXT: @%p1 bra $L__BB18_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB86_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB18_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB86_1;
-; SM70-NEXT: $L__BB86_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB18_1;
+; SM70-NEXT: $L__BB18_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic
+ ret i16 %new
}
-define i16 @strong_acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_acquire_acquire_i16_global_cta(
+define i16 @acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: acquire_acquire_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acquire_acquire_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [acquire_acquire_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acquire_acquire_i16_global_cta_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r7, [strong_acquire_acquire_i16_global_cta_param_1];
-; SM70-NEXT: ld.param.b16 %r8, [strong_acquire_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: ld.param.b16 %r7, [acquire_acquire_i16_global_cta_param_1];
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB87_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB19_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB87_3;
+; SM70-NEXT: @%p1 bra $L__BB19_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB87_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB19_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB87_1;
-; SM70-NEXT: $L__BB87_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB19_1;
+; SM70-NEXT: $L__BB19_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire
+ ret i16 %new
}
-define i16 @strong_acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_acquire_seq_cst_i16_global_cta(
+define i16 @acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: acquire_seq_cst_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acquire_seq_cst_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [acquire_seq_cst_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i16_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_acquire_seq_cst_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [acquire_seq_cst_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_acquire_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB88_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB20_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB88_3;
+; SM70-NEXT: @%p1 bra $L__BB20_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB88_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB20_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB88_1;
-; SM70-NEXT: $L__BB88_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB20_1;
+; SM70-NEXT: $L__BB20_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst
+ ret i16 %new
}
-define i16 @strong_release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_release_monotonic_i16_global_cta(
+define i16 @release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: release_monotonic_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_release_monotonic_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [release_monotonic_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [release_monotonic_i16_global_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_release_monotonic_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [release_monotonic_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_release_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB89_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB21_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB89_3;
+; SM70-NEXT: @%p1 bra $L__BB21_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB89_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB21_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB89_1;
-; SM70-NEXT: $L__BB89_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB21_1;
+; SM70-NEXT: $L__BB21_3: // %partword.cmpxchg.end
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic
+ ret i16 %new
}
-define i16 @strong_release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_release_acquire_i16_global_cta(
+define i16 @release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: release_acquire_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_release_acquire_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [release_acquire_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [release_acquire_i16_global_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_release_acquire_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [release_acquire_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_release_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB90_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB22_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB90_3;
+; SM70-NEXT: @%p1 bra $L__BB22_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB90_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB22_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB90_1;
-; SM70-NEXT: $L__BB90_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB22_1;
+; SM70-NEXT: $L__BB22_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire
+ ret i16 %new
}
-define i16 @strong_release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_release_seq_cst_i16_global_cta(
+define i16 @release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: release_seq_cst_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_release_seq_cst_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [release_seq_cst_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [release_seq_cst_i16_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_release_seq_cst_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [release_seq_cst_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_release_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB91_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB23_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB91_3;
+; SM70-NEXT: @%p1 bra $L__BB23_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB91_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB23_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB91_1;
-; SM70-NEXT: $L__BB91_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB23_1;
+; SM70-NEXT: $L__BB23_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst
+ ret i16 %new
}
-define i16 @strong_acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_acq_rel_monotonic_i16_global_cta(
+define i16 @acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: acq_rel_monotonic_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_monotonic_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [acq_rel_monotonic_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i16_global_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_acq_rel_monotonic_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [acq_rel_monotonic_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_acq_rel_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB92_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB24_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB92_3;
+; SM70-NEXT: @%p1 bra $L__BB24_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB92_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB24_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB92_1;
-; SM70-NEXT: $L__BB92_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB24_1;
+; SM70-NEXT: $L__BB24_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic
+ ret i16 %new
}
-define i16 @strong_acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i16_global_cta(
+define i16 @acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: acq_rel_acquire_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [acq_rel_acquire_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i16_global_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_acq_rel_acquire_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [acq_rel_acquire_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_acq_rel_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB93_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB25_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB93_3;
+; SM70-NEXT: @%p1 bra $L__BB25_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB93_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB25_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB93_1;
-; SM70-NEXT: $L__BB93_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB25_1;
+; SM70-NEXT: $L__BB25_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire
+ ret i16 %new
}
-define i16 @strong_acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_acq_rel_seq_cst_i16_global_cta(
+define i16 @acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: acq_rel_seq_cst_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_seq_cst_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [acq_rel_seq_cst_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i16_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_acq_rel_seq_cst_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [acq_rel_seq_cst_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_acq_rel_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB94_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB26_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB94_3;
+; SM70-NEXT: @%p1 bra $L__BB26_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB94_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB26_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB94_1;
-; SM70-NEXT: $L__BB94_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB26_1;
+; SM70-NEXT: $L__BB26_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst
+ ret i16 %new
}
-define i16 @strong_seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_seq_cst_monotonic_i16_global_cta(
+define i16 @seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: seq_cst_monotonic_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_seq_cst_monotonic_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [seq_cst_monotonic_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i16_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_seq_cst_monotonic_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [seq_cst_monotonic_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_seq_cst_monotonic_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB95_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB27_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB95_3;
+; SM70-NEXT: @%p1 bra $L__BB27_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB95_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB27_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB95_1;
-; SM70-NEXT: $L__BB95_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB27_1;
+; SM70-NEXT: $L__BB27_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic
+ ret i16 %new
}
-define i16 @strong_seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_seq_cst_acquire_i16_global_cta(
+define i16 @seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: seq_cst_acquire_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_seq_cst_acquire_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [seq_cst_acquire_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i16_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_seq_cst_acquire_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [seq_cst_acquire_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_seq_cst_acquire_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB96_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB28_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB96_3;
+; SM70-NEXT: @%p1 bra $L__BB28_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB96_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB28_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB96_1;
-; SM70-NEXT: $L__BB96_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB28_1;
+; SM70-NEXT: $L__BB28_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire
+ ret i16 %new
}
-define i16 @strong_seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM70-LABEL: strong_seq_cst_seq_cst_i16_global_cta(
+define i16 @seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM70-LABEL: seq_cst_seq_cst_i16_global_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_seq_cst_seq_cst_i16_global_cta_param_0];
+; SM70-NEXT: ld.param.b16 %rs1, [seq_cst_seq_cst_i16_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i16_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b16 %r7, [strong_seq_cst_seq_cst_i16_global_cta_param_1];
+; SM70-NEXT: ld.param.b16 %r7, [seq_cst_seq_cst_i16_global_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b16 %r8, [strong_seq_cst_seq_cst_i16_global_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 65535;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 65535;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB97_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB29_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB97_3;
+; SM70-NEXT: @%p1 bra $L__BB29_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB97_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB29_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB97_1;
-; SM70-NEXT: $L__BB97_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB29_1;
+; SM70-NEXT: $L__BB29_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst
+ ret i16 %new
}
-define i32 @strong_monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_monotonic_monotonic_i32_global_cta(
+define i32 @monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: monotonic_monotonic_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_monotonic_monotonic_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_monotonic_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_monotonic_monotonic_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [monotonic_monotonic_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [monotonic_monotonic_i32_global_cta_param_2];
; SM70-NEXT: atom.relaxed.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic
+ ret i32 %new
}
-define i32 @strong_monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_monotonic_acquire_i32_global_cta(
+define i32 @monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: monotonic_acquire_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_monotonic_acquire_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_monotonic_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_monotonic_acquire_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [monotonic_acquire_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [monotonic_acquire_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire
+ ret i32 %new
}
-define i32 @strong_monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_monotonic_seq_cst_i32_global_cta(
+define i32 @monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: monotonic_seq_cst_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_monotonic_seq_cst_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i32_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [strong_monotonic_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_monotonic_seq_cst_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b32 %r1, [monotonic_seq_cst_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [monotonic_seq_cst_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst
+ ret i32 %new
}
-define i32 @strong_acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acquire_monotonic_i32_global_cta(
+define i32 @acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acquire_monotonic_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acquire_monotonic_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_acquire_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acquire_monotonic_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [acquire_monotonic_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acquire_monotonic_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic
+ ret i32 %new
}
-define i32 @strong_acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acquire_acquire_i32_global_cta(
+define i32 @acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acquire_acquire_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acquire_acquire_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_acquire_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acquire_acquire_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acquire_acquire_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [acquire_acquire_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acquire_acquire_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire
+ ret i32 %new
}
-define i32 @strong_acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acquire_seq_cst_i32_global_cta(
+define i32 @acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acquire_seq_cst_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acquire_seq_cst_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i32_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [strong_acquire_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acquire_seq_cst_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b32 %r1, [acquire_seq_cst_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acquire_seq_cst_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst
+ ret i32 %new
}
-define i32 @strong_release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_release_monotonic_i32_global_cta(
+define i32 @release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: release_monotonic_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_release_monotonic_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_release_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_release_monotonic_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [release_monotonic_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [release_monotonic_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [release_monotonic_i32_global_cta_param_2];
; SM70-NEXT: atom.release.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic
+ ret i32 %new
}
-define i32 @strong_release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_release_acquire_i32_global_cta(
+define i32 @release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: release_acquire_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_release_acquire_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_release_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_release_acquire_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [release_acquire_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [release_acquire_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [release_acquire_i32_global_cta_param_2];
; SM70-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire
+ ret i32 %new
}
-define i32 @strong_release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_release_seq_cst_i32_global_cta(
+define i32 @release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: release_seq_cst_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_release_seq_cst_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [release_seq_cst_i32_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [strong_release_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_release_seq_cst_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b32 %r1, [release_seq_cst_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [release_seq_cst_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst
+ ret i32 %new
}
-define i32 @strong_acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acq_rel_monotonic_i32_global_cta(
+define i32 @acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acq_rel_monotonic_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_monotonic_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_acq_rel_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acq_rel_monotonic_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [acq_rel_monotonic_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acq_rel_monotonic_i32_global_cta_param_2];
; SM70-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i32_global_cta(
+define i32 @acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acq_rel_acquire_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_cta_param_2];
; SM70-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acq_rel_seq_cst_i32_global_cta(
+define i32 @acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acq_rel_seq_cst_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_seq_cst_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i32_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [strong_acq_rel_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acq_rel_seq_cst_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b32 %r1, [acq_rel_seq_cst_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acq_rel_seq_cst_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst
+ ret i32 %new
}
-define i32 @strong_seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_seq_cst_monotonic_i32_global_cta(
+define i32 @seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: seq_cst_monotonic_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_seq_cst_monotonic_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i32_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [strong_seq_cst_monotonic_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_seq_cst_monotonic_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b32 %r1, [seq_cst_monotonic_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [seq_cst_monotonic_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic
+ ret i32 %new
}
-define i32 @strong_seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_seq_cst_acquire_i32_global_cta(
+define i32 @seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: seq_cst_acquire_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_seq_cst_acquire_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i32_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [strong_seq_cst_acquire_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_seq_cst_acquire_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b32 %r1, [seq_cst_acquire_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [seq_cst_acquire_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire
+ ret i32 %new
}
-define i32 @strong_seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_seq_cst_seq_cst_i32_global_cta(
+define i32 @seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: seq_cst_seq_cst_i32_global_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_seq_cst_seq_cst_i32_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i32_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b32 %r1, [strong_seq_cst_seq_cst_i32_global_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_seq_cst_seq_cst_i32_global_cta_param_2];
+; SM70-NEXT: ld.param.b32 %r1, [seq_cst_seq_cst_i32_global_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [seq_cst_seq_cst_i32_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst
+ ret i32 %new
}
-define i64 @strong_monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_monotonic_monotonic_i64_global_cta(
+define i64 @monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: monotonic_monotonic_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_monotonic_monotonic_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [strong_monotonic_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_monotonic_monotonic_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [monotonic_monotonic_i64_global_cta_param_2];
; SM70-NEXT: atom.relaxed.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic
+ ret i64 %new
}
-define i64 @strong_monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_monotonic_acquire_i64_global_cta(
+define i64 @monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: monotonic_acquire_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_monotonic_acquire_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [strong_monotonic_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_monotonic_acquire_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [monotonic_acquire_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire
+ ret i64 %new
}
-define i64 @strong_monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_monotonic_seq_cst_i64_global_cta(
+define i64 @monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: monotonic_seq_cst_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_monotonic_seq_cst_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i64_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [strong_monotonic_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_monotonic_seq_cst_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [monotonic_seq_cst_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst
+ ret i64 %new
}
-define i64 @strong_acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_acquire_monotonic_i64_global_cta(
+define i64 @acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: acquire_monotonic_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acquire_monotonic_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acquire_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_acquire_monotonic_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [acquire_monotonic_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic
+ ret i64 %new
}
-define i64 @strong_acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_acquire_acquire_i64_global_cta(
+define i64 @acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: acquire_acquire_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acquire_acquire_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acquire_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_acquire_acquire_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acquire_acquire_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd2, [acquire_acquire_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [acquire_acquire_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire
+ ret i64 %new
}
-define i64 @strong_acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_acquire_seq_cst_i64_global_cta(
+define i64 @acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: acquire_seq_cst_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acquire_seq_cst_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i64_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acquire_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_acquire_seq_cst_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [acquire_seq_cst_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst
+ ret i64 %new
}
-define i64 @strong_release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_release_monotonic_i64_global_cta(
+define i64 @release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: release_monotonic_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_release_monotonic_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [strong_release_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_release_monotonic_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [release_monotonic_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd2, [release_monotonic_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [release_monotonic_i64_global_cta_param_2];
; SM70-NEXT: atom.release.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic
+ ret i64 %new
}
-define i64 @strong_release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_release_acquire_i64_global_cta(
+define i64 @release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: release_acquire_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_release_acquire_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [strong_release_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_release_acquire_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [release_acquire_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd2, [release_acquire_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [release_acquire_i64_global_cta_param_2];
; SM70-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire
+ ret i64 %new
}
-define i64 @strong_release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_release_seq_cst_i64_global_cta(
+define i64 @release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: release_seq_cst_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_release_seq_cst_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [release_seq_cst_i64_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [strong_release_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_release_seq_cst_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [release_seq_cst_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [release_seq_cst_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst
+ ret i64 %new
}
-define i64 @strong_acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_acq_rel_monotonic_i64_global_cta(
+define i64 @acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: acq_rel_monotonic_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_monotonic_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_acq_rel_monotonic_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [acq_rel_monotonic_i64_global_cta_param_2];
; SM70-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic
+ ret i64 %new
}
-define i64 @strong_acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i64_global_cta(
+define i64 @acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: acq_rel_acquire_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i64_global_cta_param_0];
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_acq_rel_acquire_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [acq_rel_acquire_i64_global_cta_param_2];
; SM70-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire
+ ret i64 %new
}
-define i64 @strong_acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_acq_rel_seq_cst_i64_global_cta(
+define i64 @acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: acq_rel_seq_cst_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_seq_cst_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i64_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_acq_rel_seq_cst_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [acq_rel_seq_cst_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst
+ ret i64 %new
}
-define i64 @strong_seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_seq_cst_monotonic_i64_global_cta(
+define i64 @seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: seq_cst_monotonic_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_seq_cst_monotonic_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i64_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [strong_seq_cst_monotonic_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_seq_cst_monotonic_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [seq_cst_monotonic_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic
+ ret i64 %new
}
-define i64 @strong_seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_seq_cst_acquire_i64_global_cta(
+define i64 @seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: seq_cst_acquire_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_seq_cst_acquire_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i64_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [strong_seq_cst_acquire_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_seq_cst_acquire_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [seq_cst_acquire_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire
+ ret i64 %new
}
-define i64 @strong_seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM70-LABEL: strong_seq_cst_seq_cst_i64_global_cta(
+define i64 @seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM70-LABEL: seq_cst_seq_cst_i64_global_cta(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_seq_cst_seq_cst_i64_global_cta_param_0];
+; SM70-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i64_global_cta_param_0];
; SM70-NEXT: fence.sc.cta;
-; SM70-NEXT: ld.param.b64 %rd2, [strong_seq_cst_seq_cst_i64_global_cta_param_1];
-; SM70-NEXT: ld.param.b64 %rd3, [strong_seq_cst_seq_cst_i64_global_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i64_global_cta_param_1];
+; SM70-NEXT: ld.param.b64 %rd3, [seq_cst_seq_cst_i64_global_cta_param_2];
; SM70-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM70-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst
+ ret i64 %new
}
-define i8 @strong_acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i8_global(
+define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: acq_rel_acquire_i8_global(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_global_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_global_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_global_param_0];
; SM70-NEXT: fence.acq_rel.sys;
-; SM70-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_global_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_global_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_global_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.global.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB128_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB60_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB128_3;
+; SM70-NEXT: @%p1 bra $L__BB60_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB128_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB128_1;
-; SM70-NEXT: $L__BB128_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB60_1;
+; SM70-NEXT: $L__BB60_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire
+ ret i8 %new
}
-define i32 @strong_acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i32_global(
+define i32 @acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acq_rel_acquire_i32_global(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_param_2];
; SM70-NEXT: atom.acq_rel.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i32_global_sys(
+define i32 @acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acq_rel_acquire_i32_global_sys(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_sys_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_sys_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_sys_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_sys_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_sys_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_sys_param_2];
; SM70-NEXT: atom.acq_rel.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i32_global_gpu(
+define i32 @acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acq_rel_acquire_i32_global_gpu(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_gpu_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_gpu_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_gpu_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_gpu_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_gpu_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_gpu_param_2];
; SM70-NEXT: atom.acq_rel.gpu.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire
+ ret i32 %new
}
-define i8 @strong_acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i8_generic_cta(
+define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: acq_rel_acquire_i8_generic_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_generic_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_generic_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_generic_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_generic_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_generic_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_generic_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB132_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB64_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB132_3;
+; SM70-NEXT: @%p1 bra $L__BB64_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB132_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB64_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB132_1;
-; SM70-NEXT: $L__BB132_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB64_1;
+; SM70-NEXT: $L__BB64_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
+ ret i8 %new
}
-define i8 @strong_acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i8_shared_cta(
+define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
+; SM70-LABEL: acq_rel_acquire_i8_shared_cta(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
-; SM70-NEXT: .reg .b32 %r<18>;
+; SM70-NEXT: .reg .b16 %rs<2>;
+; SM70-NEXT: .reg .b32 %r<17>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_shared_cta_param_0];
+; SM70-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_shared_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_shared_cta_param_0];
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_shared_cta_param_1];
+; SM70-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_shared_cta_param_1];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
-; SM70-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_shared_cta_param_2];
-; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM70-NEXT: and.b32 %r10, %r9, 3;
-; SM70-NEXT: shl.b32 %r1, %r10, 3;
-; SM70-NEXT: mov.b32 %r11, 255;
-; SM70-NEXT: shl.b32 %r12, %r11, %r1;
-; SM70-NEXT: not.b32 %r2, %r12;
-; SM70-NEXT: shl.b32 %r3, %r8, %r1;
+; SM70-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM70-NEXT: and.b32 %r9, %r8, 3;
+; SM70-NEXT: shl.b32 %r1, %r9, 3;
+; SM70-NEXT: mov.b32 %r10, 255;
+; SM70-NEXT: shl.b32 %r11, %r10, %r1;
+; SM70-NEXT: not.b32 %r2, %r11;
+; SM70-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM70-NEXT: shl.b32 %r3, %r12, %r1;
; SM70-NEXT: shl.b32 %r4, %r7, %r1;
; SM70-NEXT: ld.shared.b32 %r13, [%rd1];
-; SM70-NEXT: and.b32 %r17, %r13, %r2;
-; SM70-NEXT: $L__BB133_1: // %partword.cmpxchg.loop
+; SM70-NEXT: and.b32 %r16, %r13, %r2;
+; SM70-NEXT: $L__BB65_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
-; SM70-NEXT: or.b32 %r14, %r17, %r3;
-; SM70-NEXT: or.b32 %r15, %r17, %r4;
+; SM70-NEXT: or.b32 %r14, %r16, %r3;
+; SM70-NEXT: or.b32 %r15, %r16, %r4;
; SM70-NEXT: atom.relaxed.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
; SM70-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM70-NEXT: @%p1 bra $L__BB133_3;
+; SM70-NEXT: @%p1 bra $L__BB65_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM70-NEXT: // in Loop: Header=BB133_1 Depth=1
+; SM70-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM70-NEXT: and.b32 %r6, %r5, %r2;
-; SM70-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM70-NEXT: mov.b32 %r17, %r6;
-; SM70-NEXT: @%p2 bra $L__BB133_1;
-; SM70-NEXT: $L__BB133_3: // %partword.cmpxchg.end
-; SM70-NEXT: shr.u32 %r16, %r5, %r1;
+; SM70-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM70-NEXT: mov.b32 %r16, %r6;
+; SM70-NEXT: @%p2 bra $L__BB65_1;
+; SM70-NEXT: $L__BB65_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.cta;
-; SM70-NEXT: st.param.b32 [func_retval0], %r16;
+; SM70-NEXT: st.param.b32 [func_retval0], %r12;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
+ ret i8 %new
}
-define i32 @strong_acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i32_generic_cta(
+define i32 @acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acq_rel_acquire_i32_generic_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_generic_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_generic_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_generic_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_generic_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_generic_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_generic_cta_param_2];
; SM70-NEXT: atom.acq_rel.cta.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) {
-; SM70-LABEL: strong_acq_rel_acquire_i32_shared_cta(
+define i32 @acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) {
+; SM70-LABEL: acq_rel_acquire_i32_shared_cta(
; SM70: {
; SM70-NEXT: .reg .b32 %r<4>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_shared_cta_param_0];
-; SM70-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_shared_cta_param_1];
-; SM70-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_shared_cta_param_2];
+; SM70-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_shared_cta_param_0];
+; SM70-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_shared_cta_param_1];
+; SM70-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_shared_cta_param_2];
; SM70-NEXT: atom.acq_rel.cta.shared.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM70-NEXT: st.param.b32 [func_retval0], %r3;
+; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
+ ret i32 %new
}
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
index f206697bee006..4cdedb2065e23 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
@@ -1,3914 +1,2121 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90
-; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.7 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify -arch=sm_90 %}
-define i8 @weak_monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_monotonic_monotonic_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_monotonic_monotonic_i8_global_cta_param_0];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r6, [weak_monotonic_monotonic_i8_global_cta_param_1];
-; SM90-NEXT: ld.param.b8 %r7, [weak_monotonic_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_monotonic_acquire_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_monotonic_acquire_i8_global_cta_param_0];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r6, [weak_monotonic_acquire_i8_global_cta_param_1];
-; SM90-NEXT: ld.param.b8 %r7, [weak_monotonic_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_monotonic_seq_cst_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_monotonic_seq_cst_i8_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_monotonic_seq_cst_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_monotonic_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_acquire_monotonic_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acquire_monotonic_i8_global_cta_param_0];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r6, [weak_acquire_monotonic_i8_global_cta_param_1];
-; SM90-NEXT: ld.param.b8 %r7, [weak_acquire_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_acquire_acquire_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acquire_acquire_i8_global_cta_param_0];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r6, [weak_acquire_acquire_i8_global_cta_param_1];
-; SM90-NEXT: ld.param.b8 %r7, [weak_acquire_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_acquire_seq_cst_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acquire_seq_cst_i8_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_acquire_seq_cst_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_acquire_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_release_monotonic_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_release_monotonic_i8_global_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_release_monotonic_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_release_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_release_acquire_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_release_acquire_i8_global_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_release_acquire_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_release_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_release_seq_cst_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_release_seq_cst_i8_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_release_seq_cst_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_release_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_acq_rel_monotonic_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_monotonic_i8_global_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_acq_rel_monotonic_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_acq_rel_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_global_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_acq_rel_seq_cst_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_seq_cst_i8_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_acq_rel_seq_cst_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_acq_rel_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_seq_cst_monotonic_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_seq_cst_monotonic_i8_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_seq_cst_monotonic_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_seq_cst_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_seq_cst_acquire_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_seq_cst_acquire_i8_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_seq_cst_acquire_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_seq_cst_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_seq_cst_seq_cst_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_seq_cst_seq_cst_i8_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_seq_cst_seq_cst_i8_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_seq_cst_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i16 @weak_monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_monotonic_monotonic_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_monotonic_monotonic_i16_global_cta_param_0];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r6, [weak_monotonic_monotonic_i16_global_cta_param_1];
-; SM90-NEXT: ld.param.b16 %r7, [weak_monotonic_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_monotonic_acquire_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_monotonic_acquire_i16_global_cta_param_0];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r6, [weak_monotonic_acquire_i16_global_cta_param_1];
-; SM90-NEXT: ld.param.b16 %r7, [weak_monotonic_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_monotonic_seq_cst_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_monotonic_seq_cst_i16_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_monotonic_seq_cst_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_monotonic_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_acquire_monotonic_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acquire_monotonic_i16_global_cta_param_0];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r6, [weak_acquire_monotonic_i16_global_cta_param_1];
-; SM90-NEXT: ld.param.b16 %r7, [weak_acquire_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_acquire_acquire_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acquire_acquire_i16_global_cta_param_0];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r6, [weak_acquire_acquire_i16_global_cta_param_1];
-; SM90-NEXT: ld.param.b16 %r7, [weak_acquire_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_acquire_seq_cst_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acquire_seq_cst_i16_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_acquire_seq_cst_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_acquire_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_release_monotonic_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_release_monotonic_i16_global_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_release_monotonic_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_release_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_release_acquire_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_release_acquire_i16_global_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_release_acquire_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_release_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_release_seq_cst_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_release_seq_cst_i16_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_release_seq_cst_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_release_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_acq_rel_monotonic_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_monotonic_i16_global_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_acq_rel_monotonic_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_acq_rel_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i16_global_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_acq_rel_acquire_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_acq_rel_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_acq_rel_seq_cst_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_seq_cst_i16_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_acq_rel_seq_cst_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_acq_rel_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_seq_cst_monotonic_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_seq_cst_monotonic_i16_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_seq_cst_monotonic_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_seq_cst_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_seq_cst_acquire_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_seq_cst_acquire_i16_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_seq_cst_acquire_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_seq_cst_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i16 @weak_seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: weak_seq_cst_seq_cst_i16_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_seq_cst_seq_cst_i16_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r6, [weak_seq_cst_seq_cst_i16_global_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [weak_seq_cst_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 65535;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
-}
-
-define i32 @weak_monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_monotonic_monotonic_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_monotonic_monotonic_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_monotonic_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_monotonic_monotonic_i32_global_cta_param_2];
-; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_monotonic_acquire_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_monotonic_acquire_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_monotonic_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_monotonic_acquire_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_monotonic_seq_cst_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_monotonic_seq_cst_i32_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [weak_monotonic_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_monotonic_seq_cst_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acquire_monotonic_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acquire_monotonic_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acquire_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acquire_monotonic_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acquire_acquire_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acquire_acquire_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acquire_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acquire_acquire_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acquire_seq_cst_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acquire_seq_cst_i32_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [weak_acquire_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acquire_seq_cst_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_release_monotonic_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_release_monotonic_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_release_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_release_monotonic_i32_global_cta_param_2];
-; SM90-NEXT: atom.release.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_release_acquire_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_release_acquire_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_release_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_release_acquire_i32_global_cta_param_2];
-; SM90-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_release_seq_cst_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_release_seq_cst_i32_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [weak_release_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_release_seq_cst_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acq_rel_monotonic_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_monotonic_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acq_rel_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acq_rel_monotonic_i32_global_cta_param_2];
-; SM90-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_cta_param_2];
-; SM90-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acq_rel_seq_cst_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_seq_cst_i32_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [weak_acq_rel_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acq_rel_seq_cst_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_seq_cst_monotonic_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_seq_cst_monotonic_i32_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [weak_seq_cst_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_seq_cst_monotonic_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_seq_cst_acquire_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_seq_cst_acquire_i32_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [weak_seq_cst_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_seq_cst_acquire_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_seq_cst_seq_cst_i32_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_seq_cst_seq_cst_i32_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [weak_seq_cst_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_seq_cst_seq_cst_i32_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i64 @weak_monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_monotonic_monotonic_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_monotonic_monotonic_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [weak_monotonic_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_monotonic_monotonic_i64_global_cta_param_2];
-; SM90-NEXT: atom.relaxed.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_monotonic_acquire_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_monotonic_acquire_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [weak_monotonic_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_monotonic_acquire_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_monotonic_seq_cst_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_monotonic_seq_cst_i64_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [weak_monotonic_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_monotonic_seq_cst_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_acquire_monotonic_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acquire_monotonic_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acquire_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_acquire_monotonic_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_acquire_acquire_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acquire_acquire_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acquire_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_acquire_acquire_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_acquire_seq_cst_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acquire_seq_cst_i64_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acquire_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_acquire_seq_cst_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_release_monotonic_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_release_monotonic_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [weak_release_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_release_monotonic_i64_global_cta_param_2];
-; SM90-NEXT: atom.release.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_release_acquire_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_release_acquire_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [weak_release_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_release_acquire_i64_global_cta_param_2];
-; SM90-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_release_seq_cst_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_release_seq_cst_i64_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [weak_release_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_release_seq_cst_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_acq_rel_monotonic_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_monotonic_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_acq_rel_monotonic_i64_global_cta_param_2];
-; SM90-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_acq_rel_acquire_i64_global_cta_param_2];
-; SM90-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_acq_rel_seq_cst_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_seq_cst_i64_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_acq_rel_seq_cst_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_seq_cst_monotonic_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_seq_cst_monotonic_i64_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [weak_seq_cst_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_seq_cst_monotonic_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_seq_cst_acquire_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_seq_cst_acquire_i64_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [weak_seq_cst_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_seq_cst_acquire_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i64 @weak_seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: weak_seq_cst_seq_cst_i64_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .b64 %rd<5>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_seq_cst_seq_cst_i64_global_cta_param_0];
-; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [weak_seq_cst_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [weak_seq_cst_seq_cst_i64_global_cta_param_2];
-; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i8_global(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_global_param_0];
-; SM90-NEXT: fence.release.sys;
-; SM90-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_global_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_global_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.sys;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i32_global(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_param_2];
-; SM90-NEXT: atom.acq_rel.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i32_global_sys(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_sys_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_sys_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_sys_param_2];
-; SM90-NEXT: atom.acq_rel.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_cluster(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i32_global_cluster(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_cluster_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_cluster_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_cluster_param_2];
-; SM90-NEXT: atom.acq_rel.cluster.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("cluster") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i32_global_gpu(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_global_gpu_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_global_gpu_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_global_gpu_param_2];
-; SM90-NEXT: atom.acq_rel.gpu.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i8_generic_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<17>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_generic_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_generic_cta_param_1];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_generic_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
-; SM90-NEXT: and.b32 %r9, %r8, 3;
-; SM90-NEXT: shl.b32 %r1, %r9, 3;
-; SM90-NEXT: mov.b32 %r10, 255;
-; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i8 @weak_acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i8_shared_cta(
+define i8 @monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: monotonic_monotonic_i8_global_cta(
; SM90: {
+; SM90-NEXT: .reg .pred %p<3>;
+; SM90-NEXT: .reg .b16 %rs<2>;
; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [weak_acq_rel_acquire_i8_shared_cta_param_0];
-; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r6, [weak_acq_rel_acquire_i8_shared_cta_param_1];
+; SM90-NEXT: ld.param.b8 %rs1, [monotonic_monotonic_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i8_global_cta_param_0];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [weak_acq_rel_acquire_i8_shared_cta_param_2];
+; SM90-NEXT: ld.param.b8 %r7, [monotonic_monotonic_i8_global_cta_param_1];
; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
; SM90-NEXT: and.b32 %r9, %r8, 3;
; SM90-NEXT: shl.b32 %r1, %r9, 3;
; SM90-NEXT: mov.b32 %r10, 255;
; SM90-NEXT: shl.b32 %r11, %r10, %r1;
-; SM90-NEXT: not.b32 %r12, %r11;
-; SM90-NEXT: shl.b32 %r2, %r7, %r1;
-; SM90-NEXT: shl.b32 %r3, %r6, %r1;
-; SM90-NEXT: ld.shared.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r4, %r13, %r12;
-; SM90-NEXT: or.b32 %r14, %r4, %r2;
-; SM90-NEXT: or.b32 %r15, %r4, %r3;
-; SM90-NEXT: atom.relaxed.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i32_generic_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_generic_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_generic_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_generic_cta_param_2];
-; SM90-NEXT: atom.acq_rel.cta.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i32 @weak_acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: weak_acq_rel_acquire_i32_shared_cta(
-; SM90: {
-; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .b64 %rd<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [weak_acq_rel_acquire_i32_shared_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [weak_acq_rel_acquire_i32_shared_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [weak_acq_rel_acquire_i32_shared_cta_param_2];
-; SM90-NEXT: atom.acq_rel.cta.shared.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
-; SM90-NEXT: ret;
- %pairold = cmpxchg weak ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
-}
-
-define i8 @strong_monotonic_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_monotonic_monotonic_i8_global_cta(
-; SM90: {
-; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
-; SM90-NEXT: .reg .b64 %rd<3>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_monotonic_monotonic_i8_global_cta_param_0];
-; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [strong_monotonic_monotonic_i8_global_cta_param_1];
-; SM90-NEXT: ld.param.b8 %r8, [strong_monotonic_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB69_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB69_3;
+; SM90-NEXT: @%p1 bra $L__BB0_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB69_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB69_1;
-; SM90-NEXT: $L__BB69_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB0_1;
+; SM90-NEXT: $L__BB0_3: // %partword.cmpxchg.end
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic monotonic
+ ret i8 %new
}
-define i8 @strong_monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_monotonic_acquire_i8_global_cta(
+define i8 @monotonic_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: monotonic_acquire_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_monotonic_acquire_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [monotonic_acquire_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i8_global_cta_param_0];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [strong_monotonic_acquire_i8_global_cta_param_1];
-; SM90-NEXT: ld.param.b8 %r8, [strong_monotonic_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: ld.param.b8 %r7, [monotonic_acquire_i8_global_cta_param_1];
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB70_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB70_3;
+; SM90-NEXT: @%p1 bra $L__BB1_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB70_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB70_1;
-; SM90-NEXT: $L__BB70_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB1_1;
+; SM90-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic acquire
+ ret i8 %new
}
-define i8 @strong_monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_monotonic_seq_cst_i8_global_cta(
+define i8 @monotonic_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: monotonic_seq_cst_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_monotonic_seq_cst_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [monotonic_seq_cst_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i8_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_monotonic_seq_cst_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [monotonic_seq_cst_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_monotonic_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB71_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB2_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB71_3;
+; SM90-NEXT: @%p1 bra $L__BB2_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB71_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB71_1;
-; SM90-NEXT: $L__BB71_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB2_1;
+; SM90-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") monotonic seq_cst
+ ret i8 %new
}
-define i8 @strong_acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_acquire_monotonic_i8_global_cta(
+define i8 @acquire_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: acquire_monotonic_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acquire_monotonic_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [acquire_monotonic_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i8_global_cta_param_0];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [strong_acquire_monotonic_i8_global_cta_param_1];
-; SM90-NEXT: ld.param.b8 %r8, [strong_acquire_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: ld.param.b8 %r7, [acquire_monotonic_i8_global_cta_param_1];
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB72_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB3_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB72_3;
+; SM90-NEXT: @%p1 bra $L__BB3_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB72_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB72_1;
-; SM90-NEXT: $L__BB72_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB3_1;
+; SM90-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire monotonic
+ ret i8 %new
}
-define i8 @strong_acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_acquire_acquire_i8_global_cta(
+define i8 @acquire_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: acquire_acquire_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acquire_acquire_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [acquire_acquire_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i8_global_cta_param_0];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r7, [strong_acquire_acquire_i8_global_cta_param_1];
-; SM90-NEXT: ld.param.b8 %r8, [strong_acquire_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: ld.param.b8 %r7, [acquire_acquire_i8_global_cta_param_1];
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB73_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB4_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB73_3;
+; SM90-NEXT: @%p1 bra $L__BB4_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB73_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB73_1;
-; SM90-NEXT: $L__BB73_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB4_1;
+; SM90-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire acquire
+ ret i8 %new
}
-define i8 @strong_acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_acquire_seq_cst_i8_global_cta(
+define i8 @acquire_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: acquire_seq_cst_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acquire_seq_cst_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [acquire_seq_cst_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i8_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_acquire_seq_cst_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [acquire_seq_cst_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_acquire_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB74_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB5_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB74_3;
+; SM90-NEXT: @%p1 bra $L__BB5_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB74_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB74_1;
-; SM90-NEXT: $L__BB74_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB5_1;
+; SM90-NEXT: $L__BB5_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acquire seq_cst
+ ret i8 %new
}
-define i8 @strong_release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_release_monotonic_i8_global_cta(
+define i8 @release_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: release_monotonic_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_release_monotonic_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [release_monotonic_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i8_global_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_release_monotonic_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [release_monotonic_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_release_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB75_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB6_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB75_3;
+; SM90-NEXT: @%p1 bra $L__BB6_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB75_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB75_1;
-; SM90-NEXT: $L__BB75_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB6_1;
+; SM90-NEXT: $L__BB6_3: // %partword.cmpxchg.end
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release monotonic
+ ret i8 %new
}
-define i8 @strong_release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_release_acquire_i8_global_cta(
+define i8 @release_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: release_acquire_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_release_acquire_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [release_acquire_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i8_global_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_release_acquire_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [release_acquire_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_release_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB76_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB7_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB76_3;
+; SM90-NEXT: @%p1 bra $L__BB7_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB76_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB76_1;
-; SM90-NEXT: $L__BB76_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB7_1;
+; SM90-NEXT: $L__BB7_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release acquire
+ ret i8 %new
}
-define i8 @strong_release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_release_seq_cst_i8_global_cta(
+define i8 @release_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: release_seq_cst_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_release_seq_cst_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [release_seq_cst_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i8_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_release_seq_cst_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [release_seq_cst_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_release_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB77_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB8_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB77_3;
+; SM90-NEXT: @%p1 bra $L__BB8_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB77_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB77_1;
-; SM90-NEXT: $L__BB77_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB8_1;
+; SM90-NEXT: $L__BB8_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") release seq_cst
+ ret i8 %new
}
-define i8 @strong_acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_acq_rel_monotonic_i8_global_cta(
+define i8 @acq_rel_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: acq_rel_monotonic_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_monotonic_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_monotonic_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i8_global_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_acq_rel_monotonic_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [acq_rel_monotonic_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_acq_rel_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB78_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB9_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB78_3;
+; SM90-NEXT: @%p1 bra $L__BB9_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB78_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB78_1;
-; SM90-NEXT: $L__BB78_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB9_1;
+; SM90-NEXT: $L__BB9_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel monotonic
+ ret i8 %new
}
-define i8 @strong_acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i8_global_cta(
+define i8 @acq_rel_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: acq_rel_acquire_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_global_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB79_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB10_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB79_3;
+; SM90-NEXT: @%p1 bra $L__BB10_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB79_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB79_1;
-; SM90-NEXT: $L__BB79_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB10_1;
+; SM90-NEXT: $L__BB10_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
+ ret i8 %new
}
-define i8 @strong_acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_acq_rel_seq_cst_i8_global_cta(
+define i8 @acq_rel_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: acq_rel_seq_cst_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_seq_cst_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_seq_cst_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i8_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_acq_rel_seq_cst_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [acq_rel_seq_cst_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_acq_rel_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB80_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB11_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB80_3;
+; SM90-NEXT: @%p1 bra $L__BB11_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB80_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB80_1;
-; SM90-NEXT: $L__BB80_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB11_1;
+; SM90-NEXT: $L__BB11_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel seq_cst
+ ret i8 %new
}
-define i8 @strong_seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_seq_cst_monotonic_i8_global_cta(
+define i8 @seq_cst_monotonic_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: seq_cst_monotonic_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_seq_cst_monotonic_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_monotonic_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i8_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_seq_cst_monotonic_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [seq_cst_monotonic_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_seq_cst_monotonic_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB81_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB12_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB81_3;
+; SM90-NEXT: @%p1 bra $L__BB12_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB81_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB81_1;
-; SM90-NEXT: $L__BB81_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB12_1;
+; SM90-NEXT: $L__BB12_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst monotonic
+ ret i8 %new
}
-define i8 @strong_seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_seq_cst_acquire_i8_global_cta(
+define i8 @seq_cst_acquire_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: seq_cst_acquire_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_seq_cst_acquire_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_acquire_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i8_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_seq_cst_acquire_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [seq_cst_acquire_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_seq_cst_acquire_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB82_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB13_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB82_3;
+; SM90-NEXT: @%p1 bra $L__BB13_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB82_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB82_1;
-; SM90-NEXT: $L__BB82_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB13_1;
+; SM90-NEXT: $L__BB13_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst acquire
+ ret i8 %new
}
-define i8 @strong_seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_seq_cst_seq_cst_i8_global_cta(
+define i8 @seq_cst_seq_cst_i8_global_cta(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: seq_cst_seq_cst_i8_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_seq_cst_seq_cst_i8_global_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [seq_cst_seq_cst_i8_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i8_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_seq_cst_seq_cst_i8_global_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [seq_cst_seq_cst_i8_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_seq_cst_seq_cst_i8_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB83_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB14_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB83_3;
+; SM90-NEXT: @%p1 bra $L__BB14_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB83_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB83_1;
-; SM90-NEXT: $L__BB83_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB14_1;
+; SM90-NEXT: $L__BB14_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new syncscope("block") seq_cst seq_cst
+ ret i8 %new
}
-define i16 @strong_monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_monotonic_monotonic_i16_global_cta(
+define i16 @monotonic_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: monotonic_monotonic_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_monotonic_monotonic_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [monotonic_monotonic_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i16_global_cta_param_0];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [strong_monotonic_monotonic_i16_global_cta_param_1];
-; SM90-NEXT: ld.param.b16 %r8, [strong_monotonic_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: ld.param.b16 %r7, [monotonic_monotonic_i16_global_cta_param_1];
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB84_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB15_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB84_3;
+; SM90-NEXT: @%p1 bra $L__BB15_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB84_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB15_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB84_1;
-; SM90-NEXT: $L__BB84_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB15_1;
+; SM90-NEXT: $L__BB15_3: // %partword.cmpxchg.end
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic monotonic
+ ret i16 %new
}
-define i16 @strong_monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_monotonic_acquire_i16_global_cta(
+define i16 @monotonic_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: monotonic_acquire_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_monotonic_acquire_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [monotonic_acquire_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i16_global_cta_param_0];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [strong_monotonic_acquire_i16_global_cta_param_1];
-; SM90-NEXT: ld.param.b16 %r8, [strong_monotonic_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: ld.param.b16 %r7, [monotonic_acquire_i16_global_cta_param_1];
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB85_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB16_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB85_3;
+; SM90-NEXT: @%p1 bra $L__BB16_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB85_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB16_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB85_1;
-; SM90-NEXT: $L__BB85_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB16_1;
+; SM90-NEXT: $L__BB16_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic acquire
+ ret i16 %new
}
-define i16 @strong_monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_monotonic_seq_cst_i16_global_cta(
+define i16 @monotonic_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: monotonic_seq_cst_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_monotonic_seq_cst_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [monotonic_seq_cst_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i16_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_monotonic_seq_cst_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [monotonic_seq_cst_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_monotonic_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB86_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB17_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB86_3;
+; SM90-NEXT: @%p1 bra $L__BB17_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB86_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB17_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB86_1;
-; SM90-NEXT: $L__BB86_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB17_1;
+; SM90-NEXT: $L__BB17_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") monotonic seq_cst
+ ret i16 %new
}
-define i16 @strong_acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_acquire_monotonic_i16_global_cta(
+define i16 @acquire_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: acquire_monotonic_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acquire_monotonic_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [acquire_monotonic_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i16_global_cta_param_0];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [strong_acquire_monotonic_i16_global_cta_param_1];
-; SM90-NEXT: ld.param.b16 %r8, [strong_acquire_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: ld.param.b16 %r7, [acquire_monotonic_i16_global_cta_param_1];
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB87_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB18_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB87_3;
+; SM90-NEXT: @%p1 bra $L__BB18_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB87_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB18_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB87_1;
-; SM90-NEXT: $L__BB87_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB18_1;
+; SM90-NEXT: $L__BB18_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire monotonic
+ ret i16 %new
}
-define i16 @strong_acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_acquire_acquire_i16_global_cta(
+define i16 @acquire_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: acquire_acquire_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acquire_acquire_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [acquire_acquire_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i16_global_cta_param_0];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r7, [strong_acquire_acquire_i16_global_cta_param_1];
-; SM90-NEXT: ld.param.b16 %r8, [strong_acquire_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: ld.param.b16 %r7, [acquire_acquire_i16_global_cta_param_1];
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB88_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB19_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB88_3;
+; SM90-NEXT: @%p1 bra $L__BB19_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB88_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB19_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB88_1;
-; SM90-NEXT: $L__BB88_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB19_1;
+; SM90-NEXT: $L__BB19_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire acquire
+ ret i16 %new
}
-define i16 @strong_acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_acquire_seq_cst_i16_global_cta(
+define i16 @acquire_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: acquire_seq_cst_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acquire_seq_cst_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [acquire_seq_cst_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i16_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_acquire_seq_cst_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [acquire_seq_cst_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_acquire_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB89_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB20_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB89_3;
+; SM90-NEXT: @%p1 bra $L__BB20_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB89_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB20_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB89_1;
-; SM90-NEXT: $L__BB89_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB20_1;
+; SM90-NEXT: $L__BB20_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acquire seq_cst
+ ret i16 %new
}
-define i16 @strong_release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_release_monotonic_i16_global_cta(
+define i16 @release_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: release_monotonic_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_release_monotonic_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [release_monotonic_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i16_global_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_release_monotonic_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [release_monotonic_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_release_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB90_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB21_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB90_3;
+; SM90-NEXT: @%p1 bra $L__BB21_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB90_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB21_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB90_1;
-; SM90-NEXT: $L__BB90_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB21_1;
+; SM90-NEXT: $L__BB21_3: // %partword.cmpxchg.end
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release monotonic
+ ret i16 %new
}
-define i16 @strong_release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_release_acquire_i16_global_cta(
+define i16 @release_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: release_acquire_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_release_acquire_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [release_acquire_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i16_global_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_release_acquire_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [release_acquire_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_release_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB91_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB22_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB91_3;
+; SM90-NEXT: @%p1 bra $L__BB22_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB91_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB22_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB91_1;
-; SM90-NEXT: $L__BB91_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB22_1;
+; SM90-NEXT: $L__BB22_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release acquire
+ ret i16 %new
}
-define i16 @strong_release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_release_seq_cst_i16_global_cta(
+define i16 @release_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: release_seq_cst_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_release_seq_cst_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [release_seq_cst_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i16_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_release_seq_cst_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [release_seq_cst_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_release_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB92_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB23_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB92_3;
+; SM90-NEXT: @%p1 bra $L__BB23_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB92_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB23_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB92_1;
-; SM90-NEXT: $L__BB92_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB23_1;
+; SM90-NEXT: $L__BB23_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") release seq_cst
+ ret i16 %new
}
-define i16 @strong_acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_acq_rel_monotonic_i16_global_cta(
+define i16 @acq_rel_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: acq_rel_monotonic_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_monotonic_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_monotonic_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i16_global_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_acq_rel_monotonic_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [acq_rel_monotonic_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_acq_rel_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB93_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB24_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB93_3;
+; SM90-NEXT: @%p1 bra $L__BB24_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB93_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB24_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB93_1;
-; SM90-NEXT: $L__BB93_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB24_1;
+; SM90-NEXT: $L__BB24_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel monotonic
+ ret i16 %new
}
-define i16 @strong_acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i16_global_cta(
+define i16 @acq_rel_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: acq_rel_acquire_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_acquire_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i16_global_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_acq_rel_acquire_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [acq_rel_acquire_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_acq_rel_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB94_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB25_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB94_3;
+; SM90-NEXT: @%p1 bra $L__BB25_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB94_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB25_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB94_1;
-; SM90-NEXT: $L__BB94_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB25_1;
+; SM90-NEXT: $L__BB25_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel acquire
+ ret i16 %new
}
-define i16 @strong_acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_acq_rel_seq_cst_i16_global_cta(
+define i16 @acq_rel_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: acq_rel_seq_cst_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_seq_cst_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [acq_rel_seq_cst_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i16_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_acq_rel_seq_cst_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [acq_rel_seq_cst_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_acq_rel_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB95_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB26_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB95_3;
+; SM90-NEXT: @%p1 bra $L__BB26_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB95_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB26_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB95_1;
-; SM90-NEXT: $L__BB95_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB26_1;
+; SM90-NEXT: $L__BB26_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") acq_rel seq_cst
+ ret i16 %new
}
-define i16 @strong_seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_seq_cst_monotonic_i16_global_cta(
+define i16 @seq_cst_monotonic_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: seq_cst_monotonic_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_seq_cst_monotonic_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_monotonic_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i16_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_seq_cst_monotonic_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [seq_cst_monotonic_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_seq_cst_monotonic_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB96_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB27_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB96_3;
+; SM90-NEXT: @%p1 bra $L__BB27_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB96_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB27_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB96_1;
-; SM90-NEXT: $L__BB96_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB27_1;
+; SM90-NEXT: $L__BB27_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst monotonic
+ ret i16 %new
}
-define i16 @strong_seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_seq_cst_acquire_i16_global_cta(
+define i16 @seq_cst_acquire_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: seq_cst_acquire_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_seq_cst_acquire_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_acquire_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i16_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_seq_cst_acquire_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [seq_cst_acquire_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_seq_cst_acquire_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB97_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB28_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB97_3;
+; SM90-NEXT: @%p1 bra $L__BB28_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB97_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB28_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB97_1;
-; SM90-NEXT: $L__BB97_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB28_1;
+; SM90-NEXT: $L__BB28_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst acquire
+ ret i16 %new
}
-define i16 @strong_seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
-; SM90-LABEL: strong_seq_cst_seq_cst_i16_global_cta(
+define i16 @seq_cst_seq_cst_i16_global_cta(ptr addrspace(1) %addr, i16 %cmp, i16 %new) {
+; SM90-LABEL: seq_cst_seq_cst_i16_global_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_seq_cst_seq_cst_i16_global_cta_param_0];
+; SM90-NEXT: ld.param.b16 %rs1, [seq_cst_seq_cst_i16_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i16_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b16 %r7, [strong_seq_cst_seq_cst_i16_global_cta_param_1];
+; SM90-NEXT: ld.param.b16 %r7, [seq_cst_seq_cst_i16_global_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b16 %r8, [strong_seq_cst_seq_cst_i16_global_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 65535;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 65535;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB98_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB29_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB98_3;
+; SM90-NEXT: @%p1 bra $L__BB29_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB98_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB29_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB98_1;
-; SM90-NEXT: $L__BB98_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB29_1;
+; SM90-NEXT: $L__BB29_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i16, i1 } %pairold, 0
- ret i16 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i16 %cmp, i16 %new syncscope("block") seq_cst seq_cst
+ ret i16 %new
}
-define i32 @strong_monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_monotonic_monotonic_i32_global_cta(
+define i32 @monotonic_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: monotonic_monotonic_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_monotonic_monotonic_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_monotonic_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_monotonic_monotonic_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [monotonic_monotonic_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [monotonic_monotonic_i32_global_cta_param_2];
; SM90-NEXT: atom.relaxed.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic monotonic
+ ret i32 %new
}
-define i32 @strong_monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_monotonic_acquire_i32_global_cta(
+define i32 @monotonic_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: monotonic_acquire_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_monotonic_acquire_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_monotonic_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_monotonic_acquire_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [monotonic_acquire_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [monotonic_acquire_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic acquire
+ ret i32 %new
}
-define i32 @strong_monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_monotonic_seq_cst_i32_global_cta(
+define i32 @monotonic_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: monotonic_seq_cst_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_monotonic_seq_cst_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i32_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [strong_monotonic_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_monotonic_seq_cst_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b32 %r1, [monotonic_seq_cst_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [monotonic_seq_cst_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") monotonic seq_cst
+ ret i32 %new
}
-define i32 @strong_acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acquire_monotonic_i32_global_cta(
+define i32 @acquire_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acquire_monotonic_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acquire_monotonic_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acquire_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acquire_monotonic_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acquire_monotonic_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acquire_monotonic_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire monotonic
+ ret i32 %new
}
-define i32 @strong_acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acquire_acquire_i32_global_cta(
+define i32 @acquire_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acquire_acquire_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acquire_acquire_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acquire_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acquire_acquire_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acquire_acquire_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acquire_acquire_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acquire_acquire_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire acquire
+ ret i32 %new
}
-define i32 @strong_acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acquire_seq_cst_i32_global_cta(
+define i32 @acquire_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acquire_seq_cst_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acquire_seq_cst_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i32_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [strong_acquire_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acquire_seq_cst_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b32 %r1, [acquire_seq_cst_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acquire_seq_cst_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acquire seq_cst
+ ret i32 %new
}
-define i32 @strong_release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_release_monotonic_i32_global_cta(
+define i32 @release_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: release_monotonic_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_release_monotonic_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_release_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_release_monotonic_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [release_monotonic_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [release_monotonic_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [release_monotonic_i32_global_cta_param_2];
; SM90-NEXT: atom.release.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release monotonic
+ ret i32 %new
}
-define i32 @strong_release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_release_acquire_i32_global_cta(
+define i32 @release_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: release_acquire_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_release_acquire_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_release_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_release_acquire_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [release_acquire_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [release_acquire_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [release_acquire_i32_global_cta_param_2];
; SM90-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release acquire
+ ret i32 %new
}
-define i32 @strong_release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_release_seq_cst_i32_global_cta(
+define i32 @release_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: release_seq_cst_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_release_seq_cst_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [release_seq_cst_i32_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [strong_release_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_release_seq_cst_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b32 %r1, [release_seq_cst_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [release_seq_cst_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") release seq_cst
+ ret i32 %new
}
-define i32 @strong_acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acq_rel_monotonic_i32_global_cta(
+define i32 @acq_rel_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acq_rel_monotonic_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_monotonic_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acq_rel_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acq_rel_monotonic_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acq_rel_monotonic_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acq_rel_monotonic_i32_global_cta_param_2];
; SM90-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel monotonic
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i32_global_cta(
+define i32 @acq_rel_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acq_rel_acquire_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_cta_param_2];
; SM90-NEXT: atom.acq_rel.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acq_rel_seq_cst_i32_global_cta(
+define i32 @acq_rel_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acq_rel_seq_cst_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_seq_cst_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i32_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [strong_acq_rel_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acq_rel_seq_cst_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b32 %r1, [acq_rel_seq_cst_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acq_rel_seq_cst_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel seq_cst
+ ret i32 %new
}
-define i32 @strong_seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_seq_cst_monotonic_i32_global_cta(
+define i32 @seq_cst_monotonic_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: seq_cst_monotonic_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_seq_cst_monotonic_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i32_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [strong_seq_cst_monotonic_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_seq_cst_monotonic_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b32 %r1, [seq_cst_monotonic_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [seq_cst_monotonic_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst monotonic
+ ret i32 %new
}
-define i32 @strong_seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_seq_cst_acquire_i32_global_cta(
+define i32 @seq_cst_acquire_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: seq_cst_acquire_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_seq_cst_acquire_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i32_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [strong_seq_cst_acquire_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_seq_cst_acquire_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b32 %r1, [seq_cst_acquire_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [seq_cst_acquire_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst acquire
+ ret i32 %new
}
-define i32 @strong_seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_seq_cst_seq_cst_i32_global_cta(
+define i32 @seq_cst_seq_cst_i32_global_cta(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: seq_cst_seq_cst_i32_global_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_seq_cst_seq_cst_i32_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i32_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b32 %r1, [strong_seq_cst_seq_cst_i32_global_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_seq_cst_seq_cst_i32_global_cta_param_2];
+; SM90-NEXT: ld.param.b32 %r1, [seq_cst_seq_cst_i32_global_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [seq_cst_seq_cst_i32_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("block") seq_cst seq_cst
+ ret i32 %new
}
-define i64 @strong_monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_monotonic_monotonic_i64_global_cta(
+define i64 @monotonic_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: monotonic_monotonic_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_monotonic_monotonic_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [strong_monotonic_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_monotonic_monotonic_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [monotonic_monotonic_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd2, [monotonic_monotonic_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [monotonic_monotonic_i64_global_cta_param_2];
; SM90-NEXT: atom.relaxed.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic monotonic
+ ret i64 %new
}
-define i64 @strong_monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_monotonic_acquire_i64_global_cta(
+define i64 @monotonic_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: monotonic_acquire_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_monotonic_acquire_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [strong_monotonic_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_monotonic_acquire_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [monotonic_acquire_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd2, [monotonic_acquire_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [monotonic_acquire_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic acquire
+ ret i64 %new
}
-define i64 @strong_monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_monotonic_seq_cst_i64_global_cta(
+define i64 @monotonic_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: monotonic_seq_cst_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_monotonic_seq_cst_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [monotonic_seq_cst_i64_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [strong_monotonic_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_monotonic_seq_cst_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [monotonic_seq_cst_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [monotonic_seq_cst_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") monotonic seq_cst
+ ret i64 %new
}
-define i64 @strong_acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_acquire_monotonic_i64_global_cta(
+define i64 @acquire_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: acquire_monotonic_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acquire_monotonic_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acquire_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_acquire_monotonic_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acquire_monotonic_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd2, [acquire_monotonic_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [acquire_monotonic_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire monotonic
+ ret i64 %new
}
-define i64 @strong_acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_acquire_acquire_i64_global_cta(
+define i64 @acquire_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: acquire_acquire_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acquire_acquire_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acquire_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_acquire_acquire_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acquire_acquire_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd2, [acquire_acquire_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [acquire_acquire_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire acquire
+ ret i64 %new
}
-define i64 @strong_acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_acquire_seq_cst_i64_global_cta(
+define i64 @acquire_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: acquire_seq_cst_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acquire_seq_cst_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [acquire_seq_cst_i64_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acquire_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_acquire_seq_cst_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acquire_seq_cst_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [acquire_seq_cst_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acquire seq_cst
+ ret i64 %new
}
-define i64 @strong_release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_release_monotonic_i64_global_cta(
+define i64 @release_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: release_monotonic_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_release_monotonic_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [strong_release_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_release_monotonic_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [release_monotonic_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd2, [release_monotonic_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [release_monotonic_i64_global_cta_param_2];
; SM90-NEXT: atom.release.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release monotonic
+ ret i64 %new
}
-define i64 @strong_release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_release_acquire_i64_global_cta(
+define i64 @release_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: release_acquire_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_release_acquire_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [strong_release_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_release_acquire_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [release_acquire_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd2, [release_acquire_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [release_acquire_i64_global_cta_param_2];
; SM90-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release acquire
+ ret i64 %new
}
-define i64 @strong_release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_release_seq_cst_i64_global_cta(
+define i64 @release_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: release_seq_cst_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_release_seq_cst_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [release_seq_cst_i64_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [strong_release_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_release_seq_cst_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [release_seq_cst_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [release_seq_cst_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") release seq_cst
+ ret i64 %new
}
-define i64 @strong_acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_acq_rel_monotonic_i64_global_cta(
+define i64 @acq_rel_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: acq_rel_monotonic_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_monotonic_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_acq_rel_monotonic_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_monotonic_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_monotonic_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_monotonic_i64_global_cta_param_2];
; SM90-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel monotonic
+ ret i64 %new
}
-define i64 @strong_acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i64_global_cta(
+define i64 @acq_rel_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: acq_rel_acquire_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i64_global_cta_param_0];
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_acq_rel_acquire_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_acquire_i64_global_cta_param_2];
; SM90-NEXT: atom.acq_rel.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel acquire
+ ret i64 %new
}
-define i64 @strong_acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_acq_rel_seq_cst_i64_global_cta(
+define i64 @acq_rel_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: acq_rel_seq_cst_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_seq_cst_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_seq_cst_i64_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_acq_rel_seq_cst_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_seq_cst_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [acq_rel_seq_cst_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") acq_rel seq_cst
+ ret i64 %new
}
-define i64 @strong_seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_seq_cst_monotonic_i64_global_cta(
+define i64 @seq_cst_monotonic_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: seq_cst_monotonic_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_seq_cst_monotonic_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_monotonic_i64_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [strong_seq_cst_monotonic_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_seq_cst_monotonic_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_monotonic_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_monotonic_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst monotonic
+ ret i64 %new
}
-define i64 @strong_seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_seq_cst_acquire_i64_global_cta(
+define i64 @seq_cst_acquire_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: seq_cst_acquire_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_seq_cst_acquire_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_acquire_i64_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [strong_seq_cst_acquire_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_seq_cst_acquire_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_acquire_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_acquire_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst acquire
+ ret i64 %new
}
-define i64 @strong_seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
-; SM90-LABEL: strong_seq_cst_seq_cst_i64_global_cta(
+define i64 @seq_cst_seq_cst_i64_global_cta(ptr addrspace(1) %addr, i64 %cmp, i64 %new) {
+; SM90-LABEL: seq_cst_seq_cst_i64_global_cta(
; SM90: {
; SM90-NEXT: .reg .b64 %rd<5>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_seq_cst_seq_cst_i64_global_cta_param_0];
+; SM90-NEXT: ld.param.b64 %rd1, [seq_cst_seq_cst_i64_global_cta_param_0];
; SM90-NEXT: fence.sc.cta;
-; SM90-NEXT: ld.param.b64 %rd2, [strong_seq_cst_seq_cst_i64_global_cta_param_1];
-; SM90-NEXT: ld.param.b64 %rd3, [strong_seq_cst_seq_cst_i64_global_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [seq_cst_seq_cst_i64_global_cta_param_1];
+; SM90-NEXT: ld.param.b64 %rd3, [seq_cst_seq_cst_i64_global_cta_param_2];
; SM90-NEXT: atom.acquire.cta.global.cas.b64 %rd4, [%rd1], %rd2, %rd3;
-; SM90-NEXT: st.param.b64 [func_retval0], %rd4;
+; SM90-NEXT: st.param.b64 [func_retval0], %rd3;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst
- %oldvalue = extractvalue { i64, i1 } %pairold, 0
- ret i64 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i64 %cmp, i64 %new syncscope("block") seq_cst seq_cst
+ ret i64 %new
}
-define i8 @strong_acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i8_global(
+define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: acq_rel_acquire_i8_global(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_global_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_global_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_global_param_0];
; SM90-NEXT: fence.release.sys;
-; SM90-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_global_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_global_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_global_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.global.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB129_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB60_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.sys.global.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB129_3;
+; SM90-NEXT: @%p1 bra $L__BB60_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB129_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB129_1;
-; SM90-NEXT: $L__BB129_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB60_1;
+; SM90-NEXT: $L__BB60_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i8 %cmp, i8 %new acq_rel acquire
+ ret i8 %new
}
-define i32 @strong_acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i32_global(
+define i32 @acq_rel_acquire_i32_global(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acq_rel_acquire_i32_global(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_param_2];
; SM90-NEXT: atom.acq_rel.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i32_global_sys(
+define i32 @acq_rel_acquire_i32_global_sys(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acq_rel_acquire_i32_global_sys(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_sys_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_sys_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_sys_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_sys_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_sys_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_sys_param_2];
; SM90-NEXT: atom.acq_rel.sys.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_cluster(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i32_global_cluster(
+define i32 @acq_rel_acquire_i32_global_cluster(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acq_rel_acquire_i32_global_cluster(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_cluster_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_cluster_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_cluster_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_cluster_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_cluster_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_cluster_param_2];
; SM90-NEXT: atom.acq_rel.cluster.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("cluster") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("cluster") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i32_global_gpu(
+define i32 @acq_rel_acquire_i32_global_gpu(ptr addrspace(1) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acq_rel_acquire_i32_global_gpu(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_global_gpu_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_global_gpu_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_global_gpu_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_global_gpu_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_global_gpu_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_global_gpu_param_2];
; SM90-NEXT: atom.acq_rel.gpu.global.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(1) %addr, i32 %cmp, i32 %new syncscope("device") acq_rel acquire
+ ret i32 %new
}
-define i8 @strong_acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i8_generic_cta(
+define i8 @acq_rel_acquire_i8_generic_cta(ptr %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: acq_rel_acquire_i8_generic_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_generic_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_generic_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_generic_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_generic_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_generic_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_generic_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB134_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB65_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB134_3;
+; SM90-NEXT: @%p1 bra $L__BB65_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB134_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB134_1;
-; SM90-NEXT: $L__BB134_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB65_1;
+; SM90-NEXT: $L__BB65_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
+ ret i8 %new
}
-define i8 @strong_acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i8_shared_cta(
+define i8 @acq_rel_acquire_i8_shared_cta(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
+; SM90-LABEL: acq_rel_acquire_i8_shared_cta(
; SM90: {
; SM90-NEXT: .reg .pred %p<3>;
-; SM90-NEXT: .reg .b32 %r<18>;
+; SM90-NEXT: .reg .b16 %rs<2>;
+; SM90-NEXT: .reg .b32 %r<17>;
; SM90-NEXT: .reg .b64 %rd<3>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd2, [strong_acq_rel_acquire_i8_shared_cta_param_0];
+; SM90-NEXT: ld.param.b8 %rs1, [acq_rel_acquire_i8_shared_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd2, [acq_rel_acquire_i8_shared_cta_param_0];
; SM90-NEXT: fence.release.cta;
-; SM90-NEXT: ld.param.b8 %r7, [strong_acq_rel_acquire_i8_shared_cta_param_1];
+; SM90-NEXT: ld.param.b8 %r7, [acq_rel_acquire_i8_shared_cta_param_1];
; SM90-NEXT: and.b64 %rd1, %rd2, -4;
-; SM90-NEXT: ld.param.b8 %r8, [strong_acq_rel_acquire_i8_shared_cta_param_2];
-; SM90-NEXT: cvt.u32.u64 %r9, %rd2;
-; SM90-NEXT: and.b32 %r10, %r9, 3;
-; SM90-NEXT: shl.b32 %r1, %r10, 3;
-; SM90-NEXT: mov.b32 %r11, 255;
-; SM90-NEXT: shl.b32 %r12, %r11, %r1;
-; SM90-NEXT: not.b32 %r2, %r12;
-; SM90-NEXT: shl.b32 %r3, %r8, %r1;
+; SM90-NEXT: cvt.u32.u64 %r8, %rd2;
+; SM90-NEXT: and.b32 %r9, %r8, 3;
+; SM90-NEXT: shl.b32 %r1, %r9, 3;
+; SM90-NEXT: mov.b32 %r10, 255;
+; SM90-NEXT: shl.b32 %r11, %r10, %r1;
+; SM90-NEXT: not.b32 %r2, %r11;
+; SM90-NEXT: cvt.u32.u16 %r12, %rs1;
+; SM90-NEXT: shl.b32 %r3, %r12, %r1;
; SM90-NEXT: shl.b32 %r4, %r7, %r1;
; SM90-NEXT: ld.shared.b32 %r13, [%rd1];
-; SM90-NEXT: and.b32 %r17, %r13, %r2;
-; SM90-NEXT: $L__BB135_1: // %partword.cmpxchg.loop
+; SM90-NEXT: and.b32 %r16, %r13, %r2;
+; SM90-NEXT: $L__BB66_1: // %partword.cmpxchg.loop
; SM90-NEXT: // =>This Inner Loop Header: Depth=1
-; SM90-NEXT: or.b32 %r14, %r17, %r3;
-; SM90-NEXT: or.b32 %r15, %r17, %r4;
+; SM90-NEXT: or.b32 %r14, %r16, %r3;
+; SM90-NEXT: or.b32 %r15, %r16, %r4;
; SM90-NEXT: atom.relaxed.cta.shared.cas.b32 %r5, [%rd1], %r15, %r14;
; SM90-NEXT: setp.eq.b32 %p1, %r5, %r15;
-; SM90-NEXT: @%p1 bra $L__BB135_3;
+; SM90-NEXT: @%p1 bra $L__BB66_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
-; SM90-NEXT: // in Loop: Header=BB135_1 Depth=1
+; SM90-NEXT: // in Loop: Header=BB66_1 Depth=1
; SM90-NEXT: and.b32 %r6, %r5, %r2;
-; SM90-NEXT: setp.ne.b32 %p2, %r17, %r6;
-; SM90-NEXT: mov.b32 %r17, %r6;
-; SM90-NEXT: @%p2 bra $L__BB135_1;
-; SM90-NEXT: $L__BB135_3: // %partword.cmpxchg.end
-; SM90-NEXT: shr.u32 %r16, %r5, %r1;
+; SM90-NEXT: setp.ne.b32 %p2, %r16, %r6;
+; SM90-NEXT: mov.b32 %r16, %r6;
+; SM90-NEXT: @%p2 bra $L__BB66_1;
+; SM90-NEXT: $L__BB66_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.cta;
-; SM90-NEXT: st.param.b32 [func_retval0], %r16;
+; SM90-NEXT: st.param.b32 [func_retval0], %r12;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i8, i1 } %pairold, 0
- ret i8 %oldvalue
+ %pairold = cmpxchg ptr addrspace(3) %addr, i8 %cmp, i8 %new syncscope("block") acq_rel acquire
+ ret i8 %new
}
-define i32 @strong_acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i32_generic_cta(
+define i32 @acq_rel_acquire_i32_generic_cta(ptr %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acq_rel_acquire_i32_generic_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_generic_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_generic_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_generic_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_generic_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_generic_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_generic_cta_param_2];
; SM90-NEXT: atom.acq_rel.cta.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
+ ret i32 %new
}
-define i32 @strong_acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) {
-; SM90-LABEL: strong_acq_rel_acquire_i32_shared_cta(
+define i32 @acq_rel_acquire_i32_shared_cta(ptr addrspace(3) %addr, i32 %cmp, i32 %new) {
+; SM90-LABEL: acq_rel_acquire_i32_shared_cta(
; SM90: {
; SM90-NEXT: .reg .b32 %r<4>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b64 %rd1, [strong_acq_rel_acquire_i32_shared_cta_param_0];
-; SM90-NEXT: ld.param.b32 %r1, [strong_acq_rel_acquire_i32_shared_cta_param_1];
-; SM90-NEXT: ld.param.b32 %r2, [strong_acq_rel_acquire_i32_shared_cta_param_2];
+; SM90-NEXT: ld.param.b64 %rd1, [acq_rel_acquire_i32_shared_cta_param_0];
+; SM90-NEXT: ld.param.b32 %r1, [acq_rel_acquire_i32_shared_cta_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [acq_rel_acquire_i32_shared_cta_param_2];
; SM90-NEXT: atom.acq_rel.cta.shared.cas.b32 %r3, [%rd1], %r1, %r2;
-; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: st.param.b32 [func_retval0], %r2;
; SM90-NEXT: ret;
- %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
- %oldvalue = extractvalue { i32, i1 } %pairold, 0
- ret i32 %oldvalue
+ %pairold = cmpxchg ptr addrspace(3) %addr, i32 %cmp, i32 %new syncscope("block") acq_rel acquire
+ ret i32 %new
}
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.py b/llvm/test/CodeGen/NVPTX/cmpxchg.py
index 074662b764bfe..75623a59ad481 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg.py
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg.py
@@ -5,19 +5,17 @@
from itertools import product
cmpxchg_func = Template(
- """define i$size @${strength}_${success}_${failure}_i${size}_${addrspace}_${ptx_scope}(ptr${addrspace_cast} %addr, i$size %cmp, i$size %new) {
- %pairold = cmpxchg ${weak} ptr${addrspace_cast} %addr, i$size %cmp, i$size %new syncscope(\"${llvm_scope}\") $success $failure
- %oldvalue = extractvalue { i$size, i1 } %pairold, 0
- ret i$size %oldvalue
+ """define i$size @${success}_${failure}_i${size}_${addrspace}_${ptx_scope}(ptr${addrspace_cast} %addr, i$size %cmp, i$size %new) {
+ %pairold = cmpxchg ptr${addrspace_cast} %addr, i$size %cmp, i$size %new syncscope(\"${llvm_scope}\") $success $failure
+ ret i$size %new
}
"""
)
cmpxchg_func_no_scope = Template(
- """define i$size @${strength}_${success}_${failure}_i${size}_${addrspace}(ptr${addrspace_cast} %addr, i$size %cmp, i$size %new) {
- %pairold = cmpxchg ${weak} ptr${addrspace_cast} %addr, i$size %cmp, i$size %new $success $failure
- %oldvalue = extractvalue { i$size, i1 } %pairold, 0
- ret i$size %oldvalue
+ """define i$size @${success}_${failure}_i${size}_${addrspace}(ptr${addrspace_cast} %addr, i$size %cmp, i$size %new) {
+ %pairold = cmpxchg ptr${addrspace_cast} %addr, i$size %cmp, i$size %new $success $failure
+ ret i$size %new
}
"""
)
@@ -37,7 +35,6 @@ def get_addrspace_cast(addrspace):
TESTS = [(60, 50), (70, 63), (90, 87)]
-# We don't include (100, 90) because the codegen is identical to (90, 87)
LLVM_SCOPES = ["", "block", "cluster", "device"]
@@ -47,100 +44,90 @@ def get_addrspace_cast(addrspace):
FAILURE_ORDERINGS = ["monotonic", "acquire", "seq_cst"]
-STRENGTHS = ["weak", "strong"]
-
SIZES = [8, 16, 32, 64]
ADDRSPACES = [0, 1, 3]
ADDRSPACE_NUM_TO_ADDRSPACE = {0: "generic", 1: "global", 3: "shared"}
+
if __name__ == "__main__":
for sm, ptx in TESTS:
with open("cmpxchg-sm{}.ll".format(str(sm)), "w") as fp:
print(run_statement.substitute(sm=sm, ptx=ptx), file=fp)
- # Test weak and strong cmpxchg for all slices
- for strength in STRENGTHS:
- # Our test space is: SIZES X SUCCESS_ORDERINGS X FAILURE_ORDERINGS X ADDRSPACES X LLVM_SCOPES
- # This is very large, so we instead test 3 slices.
-
- # First slice: are all orderings correctly supported, with and without emulation loops?
- # set addrspace to global, scope to cta, generate all possible orderings, for all operation sizes
- addrspace, llvm_scope = 1, "block"
- for size, success, failure in product(
- SIZES, SUCCESS_ORDERINGS, FAILURE_ORDERINGS
- ):
- print(
- cmpxchg_func.substitute(
- success=success,
- failure=failure,
- size=size,
- addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
- addrspace_cast=get_addrspace_cast(addrspace),
- llvm_scope=llvm_scope,
- ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
- strength=strength,
- weak="weak" if strength == "weak" else "",
- ),
- file=fp,
- )
-
- # Second slice: Are all scopes correctly supported, with and without emulation loops?
- # fix addrspace, ordering, generate all possible scopes, for operation sizes i8, i32
- addrspace, success, failure = 1, "acq_rel", "acquire"
- for size in [8, 32]:
- print(
- cmpxchg_func_no_scope.substitute(
- success=success,
- failure=failure,
- size=size,
- addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
- addrspace_cast=get_addrspace_cast(addrspace),
- strength=strength,
- weak="weak" if strength == "weak" else "",
- ),
- file=fp,
- )
-
- for llvm_scope in LLVM_SCOPES:
- if sm < 90 and llvm_scope == "cluster":
- continue
- if llvm_scope == "block":
- # skip (acq_rel, acquire, global, cta)
- continue
- print(
- cmpxchg_func.substitute(
- success=success,
- failure=failure,
- size=size,
- addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
- addrspace_cast=get_addrspace_cast(addrspace),
- llvm_scope=llvm_scope,
- ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
- strength=strength,
- weak="weak" if strength == "weak" else "",
- ),
- file=fp,
- )
-
- # Third slice: Are all address spaces correctly supported?
- # fix ordering, scope, generate all possible address spaces, for operation sizes i8, i32
- success, failure, llvm_scope = "acq_rel", "acquire", "block"
- for size, addrspace in product([8, 32], ADDRSPACES):
- if addrspace == 1:
- # skip (acq_rel, acquire, global, cta)
- continue
- print(
- cmpxchg_func.substitute(
- success=success,
- failure=failure,
- size=size,
- addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
- addrspace_cast=get_addrspace_cast(addrspace),
- llvm_scope=llvm_scope,
- ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
- strength=strength,
- weak="weak" if strength == "weak" else "",
- ),
- file=fp,
- )
+
+ # Our test space is: SIZES X SUCCESS_ORDERINGS X FAILURE_ORDERINGS X ADDRSPACES X LLVM_SCOPES
+ # This is very large, so we instead test 3 slices.
+
+ # First slice: are all orderings correctly supported, with and without emulation loops?
+ # set addrspace to global, scope to cta, generate all possible orderings, for all operation sizes
+ addrspace, llvm_scope = 1, "block"
+ for size, success, failure in product(
+ SIZES, SUCCESS_ORDERINGS, FAILURE_ORDERINGS
+ ):
+ print(
+ cmpxchg_func.substitute(
+ success=success,
+ failure=failure,
+ size=size,
+ addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
+ addrspace_cast=get_addrspace_cast(addrspace),
+ llvm_scope=llvm_scope,
+ ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
+ ),
+ file=fp,
+ )
+
+ # Second slice: Are all scopes correctlly supported, with and without emulation loops?
+ # fix addrspace, ordering, generate all possible scopes, for operation sizes i8, i32
+ addrspace, success, failure = 1, "acq_rel", "acquire"
+ for size in [8, 32]:
+ print(
+ cmpxchg_func_no_scope.substitute(
+ success=success,
+ failure=failure,
+ size=size,
+ addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
+ addrspace_cast=get_addrspace_cast(addrspace),
+ ),
+ file=fp,
+ )
+
+ for llvm_scope in LLVM_SCOPES:
+ if sm < 90 and llvm_scope == "cluster":
+ continue
+ if llvm_scope == "block":
+ # skip (acq_rel, acquire, global, cta)
+ continue
+ print(
+ cmpxchg_func.substitute(
+ success=success,
+ failure=failure,
+ size=size,
+ addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
+ addrspace_cast=get_addrspace_cast(addrspace),
+ llvm_scope=llvm_scope,
+ ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
+ ),
+ file=fp,
+ )
+
+ # Third slice: Are all address spaces correctly supported?
+ # fix ordering, scope, generate all possible address spaces, for operation sizes i8, i32
+ success, failure, llvm_scope = "acq_rel", "acquire", "block"
+ for size, addrspace in product([8, 32], ADDRSPACES):
+ if addrspace == 1:
+ # skip (acq_rel, acquire, global, cta)
+ continue
+ print(
+ cmpxchg_func.substitute(
+ success=success,
+ failure=failure,
+ size=size,
+ addrspace=ADDRSPACE_NUM_TO_ADDRSPACE[addrspace],
+ addrspace_cast=get_addrspace_cast(addrspace),
+ llvm_scope=llvm_scope,
+ ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
+ ),
+ file=fp,
+ )
More information about the llvm-branch-commits
mailing list