[llvm] CodeGen/test: regen two tests with UTC (NFC) (PR #113338)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 30 07:11:10 PDT 2024
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/113338
>From 7800b0b3e42026d2cc00fd33d0bbb16abd9d6232 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 22 Oct 2024 16:44:13 +0100
Subject: [PATCH] CodeGen/test: regen two tests with UTC (NFC)
---
llvm/test/CodeGen/NVPTX/load-store.ll | 2145 ++++++++++++-----
.../PowerPC/big-endian-store-forward.ll | 12 +-
2 files changed, 1586 insertions(+), 571 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/load-store.ll b/llvm/test/CodeGen/NVPTX/load-store.ll
index f922fd92fa244e..8435e016096621 100644
--- a/llvm/test/CodeGen/NVPTX/load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70
@@ -22,149 +23,297 @@
; generic statespace
-; CHECK-LABEL: generic_weak
define void @generic_weak(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_weak(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_weak_param_0];
+; CHECK-NEXT: ld.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_weak_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_weak_param_2];
+; CHECK-NEXT: st.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_weak_param_3];
+; CHECK-NEXT: ld.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.f64 %fd1, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.f64 [%rd4], %fd2;
+; CHECK-NEXT: ld.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load i8, ptr %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i8 %a.add, ptr %a
- ; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load i16, ptr %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i16 %b.add, ptr %b
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load i32, ptr %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store i32 %c.add, ptr %c
- ; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load i64, ptr %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store i64 %d.add, ptr %d
- ; CHECK: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load float, ptr %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store float %e.add, ptr %c
- ; CHECK: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load double, ptr %d
%f.add = fadd double %f.load, 1.
- ; CHECK: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr %d
; TODO: make the lowering of this weak vector ops consistent with
; the ones of the next tests. This test lowers to a weak PTX
; vector op, but next test lowers to a vector PTX op.
- ; CHECK: ld.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load <2 x i8>, ptr %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <2 x i8> %h.add, ptr %b
; TODO: make the lowering of this weak vector ops consistent with
; the ones of the previous test. This test lowers to a weak
; PTX scalar op, but prior test lowers to a vector PTX op.
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load <4 x i8>, ptr %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <4 x i8> %i.add, ptr %c
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load <2 x i16>, ptr %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <2 x i16> %j.add, ptr %c
- ; CHECK: ld.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load <4 x i16>, ptr %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <4 x i16> %k.add, ptr %d
- ; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load <2 x i32>, ptr %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store <2 x i32> %l.add, ptr %d
- ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load <4 x i32>, ptr %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store <4 x i32> %m.add, ptr %d
- ; CHECK: ld.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load <2 x i64>, ptr %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store <2 x i64> %n.add, ptr %d
- ; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load <2 x float>, ptr %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store <2 x float> %o.add, ptr %d
- ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load <4 x float>, ptr %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store <4 x float> %p.add, ptr %d
- ; CHECK: ld.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load <2 x double>, ptr %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store <2 x double> %q.add, ptr %d
ret void
}
-; CHECK-LABEL: generic_volatile
define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_param_0];
+; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_volatile_param_2];
+; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_volatile_param_3];
+; CHECK-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.volatile.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.volatile.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.volatile.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.volatile.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.volatile.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.volatile.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.volatile.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.volatile.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.volatile.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.volatile.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.volatile.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.volatile.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.volatile.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.volatile.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.volatile.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.volatile.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.volatile.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.volatile.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.volatile.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i8 %a.add, ptr %a
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load volatile i16, ptr %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i16 %b.add, ptr %b
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load volatile i32, ptr %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile i32 %c.add, ptr %c
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load volatile i64, ptr %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store volatile i64 %d.add, ptr %d
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load volatile float, ptr %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store volatile float %e.add, ptr %c
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load volatile double, ptr %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr %c
; TODO: volatile, atomic, and volatile atomic memory operations on vector types.
@@ -184,254 +333,358 @@ define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr
; TODO: make this operation consistent with the one for <4 x i8>
; This operation lowers to a "element wise volatile PTX operation".
- ; CHECK: ld.volatile.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load volatile <2 x i8>, ptr %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.volatile.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <2 x i8> %h.add, ptr %b
; TODO: make this operation consistent with the one for <2 x i8>
; This operation lowers to a "full vector volatile PTX operation".
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load volatile <4 x i8>, ptr %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <4 x i8> %i.add, ptr %c
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load volatile <2 x i16>, ptr %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <2 x i16> %j.add, ptr %c
- ; CHECK: ld.volatile.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load volatile <4 x i16>, ptr %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.volatile.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <4 x i16> %k.add, ptr %d
- ; CHECK: ld.volatile.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load volatile <2 x i32>, ptr %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.volatile.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <2 x i32> %l.add, ptr %d
- ; CHECK: ld.volatile.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load volatile <4 x i32>, ptr %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.volatile.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <4 x i32> %m.add, ptr %d
- ; CHECK: ld.volatile.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load volatile <2 x i64>, ptr %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.volatile.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store volatile <2 x i64> %n.add, ptr %d
- ; CHECK: ld.volatile.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load volatile <2 x float>, ptr %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.volatile.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <2 x float> %o.add, ptr %d
- ; CHECK: ld.volatile.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load volatile <4 x float>, ptr %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.volatile.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <4 x float> %p.add, ptr %d
- ; CHECK: ld.volatile.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load volatile <2 x double>, ptr %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.volatile.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store volatile <2 x double> %q.add, ptr %d
ret void
}
-; CHECK-LABEL: generic_unordered_sys
define void @generic_unordered_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: generic_unordered_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_param_0];
+; SM60-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [generic_unordered_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [generic_unordered_sys_param_2];
+; SM60-NEXT: st.volatile.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [generic_unordered_sys_param_3];
+; SM60-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [generic_unordered_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: generic_unordered_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [generic_unordered_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [generic_unordered_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [generic_unordered_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [generic_unordered_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr %a unordered, align 1
- ; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr %b unordered, align 2
- ; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr %c unordered, align 4
- ; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr %d unordered, align 8
- ; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr %e unordered, align 4
- ; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr %e unordered, align 8
ret void
}
-; CHECK-LABEL: generic_unordered_volatile_sys
define void @generic_unordered_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_unordered_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_param_0];
+; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_unordered_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_unordered_volatile_sys_param_2];
+; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_unordered_volatile_sys_param_3];
+; CHECK-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [generic_unordered_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.volatile.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.volatile.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr %a unordered, align 1
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr %b unordered, align 2
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr %c unordered, align 4
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr %d unordered, align 8
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr %e unordered, align 4
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr %e unordered, align 8
ret void
}
-; CHECK-LABEL: generic_monotonic_sys
define void @generic_monotonic_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: generic_monotonic_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_param_0];
+; SM60-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [generic_monotonic_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [generic_monotonic_sys_param_2];
+; SM60-NEXT: st.volatile.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [generic_monotonic_sys_param_3];
+; SM60-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [generic_monotonic_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: generic_monotonic_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [generic_monotonic_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [generic_monotonic_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [generic_monotonic_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [generic_monotonic_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr %a monotonic, align 1
- ; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr %b monotonic, align 2
- ; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr %c monotonic, align 4
- ; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr %d monotonic, align 8
- ; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr %e monotonic, align 4
- ; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr %e monotonic, align 8
ret void
}
-; CHECK-LABEL: generic_monotonic_volatile_sys
define void @generic_monotonic_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_monotonic_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_param_0];
+; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_monotonic_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_monotonic_volatile_sys_param_2];
+; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_monotonic_volatile_sys_param_3];
+; CHECK-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [generic_monotonic_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.volatile.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.volatile.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr %a monotonic, align 1
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr %b monotonic, align 2
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr %c monotonic, align 4
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr %d monotonic, align 8
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr %e monotonic, align 4
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr %e monotonic, align 8
ret void
@@ -439,415 +692,711 @@ define void @generic_monotonic_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr
;; global statespace
-; CHECK-LABEL: global_weak
define void @global_weak(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
- ; CHECK: ld.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: global_weak(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [global_weak_param_0];
+; CHECK-NEXT: ld.global.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [global_weak_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [global_weak_param_2];
+; CHECK-NEXT: st.global.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [global_weak_param_3];
+; CHECK-NEXT: ld.global.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.global.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.global.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.global.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.global.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.global.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.global.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.global.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.global.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.global.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.global.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.global.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.global.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.global.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.global.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.global.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.global.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.global.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.global.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.global.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.global.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.global.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.global.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.global.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.global.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.global.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.global.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.global.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.global.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.global.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(1) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i8 %a.add, ptr addrspace(1) %a
- ; CHECK: ld.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load i16, ptr addrspace(1) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i16 %b.add, ptr addrspace(1) %b
- ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load i32, ptr addrspace(1) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store i32 %c.add, ptr addrspace(1) %c
- ; CHECK: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load i64, ptr addrspace(1) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store i64 %d.add, ptr addrspace(1) %d
- ; CHECK: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load float, ptr addrspace(1) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store float %e.add, ptr addrspace(1) %c
- ; CHECK: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load double, ptr addrspace(1) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(1) %c
- ; CHECK: ld.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load <2 x i8>, ptr addrspace(1) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <2 x i8> %h.add, ptr addrspace(1) %b
- ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load <4 x i8>, ptr addrspace(1) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <4 x i8> %i.add, ptr addrspace(1) %c
- ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load <2 x i16>, ptr addrspace(1) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <2 x i16> %j.add, ptr addrspace(1) %c
- ; CHECK: ld.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load <4 x i16>, ptr addrspace(1) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <4 x i16> %k.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load <2 x i32>, ptr addrspace(1) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store <2 x i32> %l.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load <4 x i32>, ptr addrspace(1) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store <4 x i32> %m.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load <2 x i64>, ptr addrspace(1) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store <2 x i64> %n.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load <2 x float>, ptr addrspace(1) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store <2 x float> %o.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load <4 x float>, ptr addrspace(1) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store <4 x float> %p.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load <2 x double>, ptr addrspace(1) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store <2 x double> %q.add, ptr addrspace(1) %d
ret void
}
-; CHECK-LABEL: global_volatile
define void @global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: global_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_param_0];
+; CHECK-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [global_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [global_volatile_param_2];
+; CHECK-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [global_volatile_param_3];
+; CHECK-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.global.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.volatile.global.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.volatile.global.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.global.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.volatile.global.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.global.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.volatile.global.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.volatile.global.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.volatile.global.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.volatile.global.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.volatile.global.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.volatile.global.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.volatile.global.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.volatile.global.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.volatile.global.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.volatile.global.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.volatile.global.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.volatile.global.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.volatile.global.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.volatile.global.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.volatile.global.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.volatile.global.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.volatile.global.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.volatile.global.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.volatile.global.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.global.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(1) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i8 %a.add, ptr addrspace(1) %a
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load volatile i16, ptr addrspace(1) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i16 %b.add, ptr addrspace(1) %b
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load volatile i32, ptr addrspace(1) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile i32 %c.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load volatile i64, ptr addrspace(1) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store volatile i64 %d.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load volatile float, ptr addrspace(1) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store volatile float %e.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load volatile double, ptr addrspace(1) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load volatile <2 x i8>, ptr addrspace(1) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.volatile.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile<2 x i8> %h.add, ptr addrspace(1) %b
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load volatile <4 x i8>, ptr addrspace(1) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile<4 x i8> %i.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load volatile <2 x i16>, ptr addrspace(1) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile<2 x i16> %j.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load volatile <4 x i16>, ptr addrspace(1) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.volatile.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile<4 x i16> %k.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load volatile <2 x i32>, ptr addrspace(1) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.volatile.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile<2 x i32> %l.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load volatile <4 x i32>, ptr addrspace(1) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.volatile.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile<4 x i32> %m.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load volatile <2 x i64>, ptr addrspace(1) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.volatile.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store volatile<2 x i64> %n.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load volatile <2 x float>, ptr addrspace(1) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.volatile.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile<2 x float> %o.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load volatile <4 x float>, ptr addrspace(1) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.volatile.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile<4 x float> %p.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load volatile <2 x double>, ptr addrspace(1) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.volatile.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store volatile<2 x double> %q.add, ptr addrspace(1) %d
ret void
}
-; CHECK-LABEL: global_unordered_sys
define void @global_unordered_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: global_unordered_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_param_0];
+; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [global_unordered_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [global_unordered_sys_param_2];
+; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [global_unordered_sys_param_3];
+; SM60-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [global_unordered_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.global.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.global.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.global.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.global.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: global_unordered_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.global.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [global_unordered_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [global_unordered_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.global.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [global_unordered_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.global.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [global_unordered_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.global.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.global.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.global.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.global.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.global.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.global.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.global.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.global.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.global.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1
- ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(1) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(1) %b unordered, align 2
- ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(1) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(1) %c unordered, align 4
- ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(1) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(1) %d unordered, align 8
- ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(1) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(1) %e unordered, align 4
- ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(1) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(1) %e unordered, align 8
ret void
}
-; CHECK-LABEL: global_unordered_volatile_sys
define void @global_unordered_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: global_unordered_volatile_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_param_0];
+; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [global_unordered_volatile_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [global_unordered_volatile_sys_param_2];
+; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [global_unordered_volatile_sys_param_3];
+; SM60-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [global_unordered_volatile_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.global.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.global.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.global.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.global.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: global_unordered_volatile_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_param_0];
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [global_unordered_volatile_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [global_unordered_volatile_sys_param_2];
+; SM70-NEXT: st.mmio.relaxed.sys.global.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [global_unordered_volatile_sys_param_3];
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [global_unordered_volatile_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u32 [%rd3], %r2;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.mmio.relaxed.sys.global.f32 [%rd5], %f2;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.mmio.relaxed.sys.global.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1
- ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(1) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(1) %b unordered, align 2
- ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(1) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(1) %c unordered, align 4
- ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(1) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(1) %d unordered, align 8
- ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(1) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(1) %e unordered, align 4
- ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(1) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(1) %e unordered, align 8
ret void
}
-; CHECK-LABEL: global_monotonic_sys
define void @global_monotonic_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: global_monotonic_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_param_0];
+; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [global_monotonic_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [global_monotonic_sys_param_2];
+; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [global_monotonic_sys_param_3];
+; SM60-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [global_monotonic_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.global.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.global.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.global.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.global.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: global_monotonic_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.global.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [global_monotonic_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [global_monotonic_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.global.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [global_monotonic_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.global.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [global_monotonic_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.global.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.global.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.global.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.global.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.global.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.global.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.global.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.global.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.global.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
- ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(1) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(1) %b monotonic, align 2
- ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(1) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(1) %c monotonic, align 4
- ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(1) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(1) %d monotonic, align 8
- ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(1) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(1) %e monotonic, align 4
- ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(1) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(1) %e monotonic, align 8
ret void
}
-; CHECK-LABEL: global_monotonic_volatile_sys
define void @global_monotonic_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: global_monotonic_volatile_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_param_0];
+; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [global_monotonic_volatile_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [global_monotonic_volatile_sys_param_2];
+; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [global_monotonic_volatile_sys_param_3];
+; SM60-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [global_monotonic_volatile_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.global.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.global.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.global.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.global.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: global_monotonic_volatile_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_param_0];
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [global_monotonic_volatile_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [global_monotonic_volatile_sys_param_2];
+; SM70-NEXT: st.mmio.relaxed.sys.global.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [global_monotonic_volatile_sys_param_3];
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [global_monotonic_volatile_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u32 [%rd3], %r2;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.mmio.relaxed.sys.global.f32 [%rd5], %f2;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.mmio.relaxed.sys.global.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
- ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(1) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(1) %b monotonic, align 2
- ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(1) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(1) %c monotonic, align 4
- ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(1) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(1) %d monotonic, align 8
- ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(1) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(1) %e monotonic, align 4
- ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(1) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(1) %e monotonic, align 8
ret void
@@ -855,391 +1404,643 @@ define void @global_monotonic_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1)
;; shared statespace
-; CHECK-LABEL: shared_weak
define void @shared_weak(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
- ; CHECK: ld.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: shared_weak(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [shared_weak_param_0];
+; CHECK-NEXT: ld.shared.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [shared_weak_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [shared_weak_param_2];
+; CHECK-NEXT: st.shared.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [shared_weak_param_3];
+; CHECK-NEXT: ld.shared.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.shared.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.shared.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.shared.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.shared.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.shared.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.shared.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.shared.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.shared.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.shared.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.shared.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.shared.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.shared.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.shared.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.shared.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.shared.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.shared.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.shared.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.shared.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.shared.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.shared.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.shared.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.shared.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.shared.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.shared.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.shared.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.shared.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.shared.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.shared.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.shared.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(3) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i8 %a.add, ptr addrspace(3) %a
- ; CHECK: ld.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load i16, ptr addrspace(3) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i16 %b.add, ptr addrspace(3) %b
- ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load i32, ptr addrspace(3) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store i32 %c.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load i64, ptr addrspace(3) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store i64 %d.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load float, ptr addrspace(3) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store float %e.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load double, ptr addrspace(3) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load <2 x i8>, ptr addrspace(3) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <2 x i8> %h.add, ptr addrspace(3) %b
- ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load <4 x i8>, ptr addrspace(3) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <4 x i8> %i.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load <2 x i16>, ptr addrspace(3) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <2 x i16> %j.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load <4 x i16>, ptr addrspace(3) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <4 x i16> %k.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load <2 x i32>, ptr addrspace(3) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store <2 x i32> %l.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load <4 x i32>, ptr addrspace(3) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store <4 x i32> %m.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load <2 x i64>, ptr addrspace(3) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store <2 x i64> %n.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load <2 x float>, ptr addrspace(3) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store <2 x float> %o.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load <4 x float>, ptr addrspace(3) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store <4 x float> %p.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load <2 x double>, ptr addrspace(3) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store <2 x double> %q.add, ptr addrspace(3) %d
ret void
}
-; CHECK-LABEL: shared_volatile
define void @shared_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: shared_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_param_0];
+; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [shared_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [shared_volatile_param_2];
+; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [shared_volatile_param_3];
+; CHECK-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.shared.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.volatile.shared.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.shared.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.volatile.shared.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.volatile.shared.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.volatile.shared.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.volatile.shared.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.volatile.shared.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.volatile.shared.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.volatile.shared.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.volatile.shared.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.volatile.shared.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.volatile.shared.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.volatile.shared.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.volatile.shared.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.volatile.shared.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.volatile.shared.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.volatile.shared.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.shared.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(3) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i8 %a.add, ptr addrspace(3) %a
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load volatile i16, ptr addrspace(3) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i16 %b.add, ptr addrspace(3) %b
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load volatile i32, ptr addrspace(3) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile i32 %c.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load volatile i64, ptr addrspace(3) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store volatile i64 %d.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load volatile float, ptr addrspace(3) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store volatile float %e.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load volatile double, ptr addrspace(3) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load volatile <2 x i8>, ptr addrspace(3) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.volatile.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <2 x i8> %h.add, ptr addrspace(3) %b
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load volatile <4 x i8>, ptr addrspace(3) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <4 x i8> %i.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load volatile <2 x i16>, ptr addrspace(3) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <2 x i16> %j.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load volatile <4 x i16>, ptr addrspace(3) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.volatile.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <4 x i16> %k.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load volatile <2 x i32>, ptr addrspace(3) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.volatile.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <2 x i32> %l.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load volatile <4 x i32>, ptr addrspace(3) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.volatile.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <4 x i32> %m.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load volatile <2 x i64>, ptr addrspace(3) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.volatile.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store volatile <2 x i64> %n.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load volatile <2 x float>, ptr addrspace(3) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.volatile.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <2 x float> %o.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load volatile <4 x float>, ptr addrspace(3) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.volatile.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <4 x float> %p.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load volatile <2 x double>, ptr addrspace(3) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.volatile.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store volatile <2 x double> %q.add, ptr addrspace(3) %d
ret void
}
-; CHECK-LABEL: shared_unordered_sys
define void @shared_unordered_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: shared_unordered_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_param_0];
+; SM60-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [shared_unordered_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [shared_unordered_sys_param_2];
+; SM60-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [shared_unordered_sys_param_3];
+; SM60-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [shared_unordered_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.shared.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.shared.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.shared.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.shared.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.shared.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.shared.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: shared_unordered_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.shared.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [shared_unordered_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [shared_unordered_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.shared.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [shared_unordered_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.shared.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [shared_unordered_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.shared.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.shared.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.shared.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.shared.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.shared.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.shared.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1
- ; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(3) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(3) %b unordered, align 2
- ; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(3) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(3) %c unordered, align 4
- ; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(3) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(3) %d unordered, align 8
- ; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(3) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(3) %e unordered, align 4
- ; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(3) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(3) %e unordered, align 8
ret void
}
-; CHECK-LABEL: shared_unordered_volatile_sys
define void @shared_unordered_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: shared_unordered_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_param_0];
+; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [shared_unordered_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [shared_unordered_volatile_sys_param_2];
+; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [shared_unordered_volatile_sys_param_3];
+; CHECK-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [shared_unordered_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.shared.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.volatile.shared.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.shared.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(3) %a unordered, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(3) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(3) %b unordered, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(3) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(3) %c unordered, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(3) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(3) %d unordered, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(3) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(3) %e unordered, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(3) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(3) %e unordered, align 8
ret void
}
-; CHECK-LABEL: shared_monotonic_sys
define void @shared_monotonic_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: shared_monotonic_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_param_0];
+; SM60-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [shared_monotonic_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [shared_monotonic_sys_param_2];
+; SM60-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [shared_monotonic_sys_param_3];
+; SM60-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [shared_monotonic_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.shared.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.shared.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.shared.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.shared.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.shared.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.shared.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: shared_monotonic_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.shared.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [shared_monotonic_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [shared_monotonic_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.shared.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [shared_monotonic_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.shared.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [shared_monotonic_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.shared.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.shared.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.shared.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.shared.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.shared.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.shared.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
- ; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(3) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(3) %b monotonic, align 2
- ; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(3) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(3) %c monotonic, align 4
- ; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(3) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(3) %d monotonic, align 8
- ; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(3) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(3) %e monotonic, align 4
- ; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(3) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(3) %e monotonic, align 8
ret void
}
-; CHECK-LABEL: shared_monotonic_volatile_sys
define void @shared_monotonic_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: shared_monotonic_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_param_0];
+; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [shared_monotonic_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [shared_monotonic_volatile_sys_param_2];
+; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [shared_monotonic_volatile_sys_param_3];
+; CHECK-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [shared_monotonic_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.shared.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.volatile.shared.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.shared.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(3) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(3) %b monotonic, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(3) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(3) %c monotonic, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(3) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(3) %d monotonic, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(3) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(3) %e monotonic, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(3) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(3) %e monotonic, align 8
ret void
@@ -1247,367 +2048,575 @@ define void @shared_monotonic_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3)
;; local statespace
-; CHECK-LABEL: local_weak
define void @local_weak(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_weak(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_weak_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_weak_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_weak_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_weak_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.local.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.local.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.local.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.local.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.local.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.local.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.local.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.local.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.local.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.local.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.local.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.local.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.local.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.local.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.local.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.local.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.local.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.local.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.local.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i8 %a.add, ptr addrspace(5) %a
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load i16, ptr addrspace(5) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i16 %b.add, ptr addrspace(5) %b
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load i32, ptr addrspace(5) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store i32 %c.add, ptr addrspace(5) %c
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load i64, ptr addrspace(5) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store i64 %d.add, ptr addrspace(5) %d
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load float, ptr addrspace(5) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store float %e.add, ptr addrspace(5) %c
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load double, ptr addrspace(5) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(5) %c
- ; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load <2 x i8>, ptr addrspace(5) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <2 x i8> %h.add, ptr addrspace(5) %b
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load <4 x i8>, ptr addrspace(5) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <4 x i8> %i.add, ptr addrspace(5) %c
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load <2 x i16>, ptr addrspace(5) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <2 x i16> %j.add, ptr addrspace(5) %c
- ; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load <4 x i16>, ptr addrspace(5) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <4 x i16> %k.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load <2 x i32>, ptr addrspace(5) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store <2 x i32> %l.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load <4 x i32>, ptr addrspace(5) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store <4 x i32> %m.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load <2 x i64>, ptr addrspace(5) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store <2 x i64> %n.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load <2 x float>, ptr addrspace(5) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store <2 x float> %o.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load <4 x float>, ptr addrspace(5) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store <4 x float> %p.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load <2 x double>, ptr addrspace(5) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store <2 x double> %q.add, ptr addrspace(5) %d
ret void
}
-; CHECK-LABEL: local_volatile
define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_volatile_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_volatile_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.local.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.local.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.local.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.local.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.local.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.local.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.local.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.local.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.local.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.local.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.local.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.local.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.local.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.local.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.local.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.local.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.local.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.local.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.local.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i8 %a.add, ptr addrspace(5) %a
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load volatile i16, ptr addrspace(5) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i16 %b.add, ptr addrspace(5) %b
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load volatile i32, ptr addrspace(5) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile i32 %c.add, ptr addrspace(5) %c
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load volatile i64, ptr addrspace(5) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store volatile i64 %d.add, ptr addrspace(5) %d
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load volatile float, ptr addrspace(5) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store volatile float %e.add, ptr addrspace(5) %c
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load volatile double, ptr addrspace(5) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(5) %c
- ; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load volatile <2 x i8>, ptr addrspace(5) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <2 x i8> %h.add, ptr addrspace(5) %b
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load volatile <4 x i8>, ptr addrspace(5) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <4 x i8> %i.add, ptr addrspace(5) %c
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load volatile <2 x i16>, ptr addrspace(5) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <2 x i16> %j.add, ptr addrspace(5) %c
- ; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load volatile <4 x i16>, ptr addrspace(5) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <4 x i16> %k.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load volatile <2 x i32>, ptr addrspace(5) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <2 x i32> %l.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load volatile <4 x i32>, ptr addrspace(5) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <4 x i32> %m.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load volatile <2 x i64>, ptr addrspace(5) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store volatile <2 x i64> %n.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load volatile <2 x float>, ptr addrspace(5) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <2 x float> %o.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load volatile <4 x float>, ptr addrspace(5) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <4 x float> %p.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load volatile <2 x double>, ptr addrspace(5) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store volatile <2 x double> %q.add, ptr addrspace(5) %d
ret void
}
-; CHECK-LABEL: local_unordered_sys
define void @local_unordered_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_unordered_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_unordered_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_unordered_sys_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_unordered_sys_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [local_unordered_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(5) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(5) %a unordered, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(5) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(5) %b unordered, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(5) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(5) %c unordered, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(5) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(5) %d unordered, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(5) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(5) %e unordered, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(5) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(5) %e unordered, align 8
ret void
}
-; CHECK-LABEL: local_unordered_volatile_sys
define void @local_unordered_volatile_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_unordered_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_unordered_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_unordered_volatile_sys_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_unordered_volatile_sys_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [local_unordered_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(5) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(5) %a unordered, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(5) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(5) %b unordered, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(5) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(5) %c unordered, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(5) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(5) %d unordered, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(5) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(5) %e unordered, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(5) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(5) %e unordered, align 8
ret void
}
-; CHECK-LABEL: local_monotonic_sys
define void @local_monotonic_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_monotonic_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_monotonic_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_monotonic_sys_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_monotonic_sys_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [local_monotonic_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(5) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(5) %b monotonic, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(5) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(5) %c monotonic, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(5) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(5) %d monotonic, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(5) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(5) %e monotonic, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(5) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(5) %e monotonic, align 8
ret void
}
-; CHECK-LABEL: local_monotonic_volatile
define void @local_monotonic_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_monotonic_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_monotonic_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_monotonic_volatile_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_monotonic_volatile_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [local_monotonic_volatile_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(5) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(5) %b monotonic, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(5) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(5) %c monotonic, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(5) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(5) %d monotonic, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(5) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(5) %e monotonic, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(5) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(5) %e monotonic, align 8
ret void
diff --git a/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll b/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll
index e139d3c9a9df0e..5bd3580f5e95ec 100644
--- a/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll
+++ b/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll
@@ -1,12 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
; The load is to the high byte of the 2-byte store
@g = global i8 -75
define void @f(i16 %v) {
-; CHECK-LABEL: f
-; CHECK: sth 3, -2(1)
-; CHECK: lbz 3, -2(1)
+; CHECK-LABEL: f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis 4, 2, .LC0 at toc@ha
+; CHECK-NEXT: sth 3, -2(1)
+; CHECK-NEXT: ld 4, .LC0 at toc@l(4)
+; CHECK-NEXT: lbz 3, -2(1)
+; CHECK-NEXT: stb 3, 0(4)
+; CHECK-NEXT: blr
%p32 = alloca i16
store i16 %v, ptr %p32
%tmp = load i8, ptr %p32
More information about the llvm-commits
mailing list