[llvm] EarlyCSE: create casts on type-mismatch (PR #113339)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 30 07:12:10 PDT 2024
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/113339
>From 7800b0b3e42026d2cc00fd33d0bbb16abd9d6232 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 22 Oct 2024 16:44:13 +0100
Subject: [PATCH 1/3] CodeGen/test: regen two tests with UTC (NFC)
---
llvm/test/CodeGen/NVPTX/load-store.ll | 2145 ++++++++++++-----
.../PowerPC/big-endian-store-forward.ll | 12 +-
2 files changed, 1586 insertions(+), 571 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/load-store.ll b/llvm/test/CodeGen/NVPTX/load-store.ll
index f922fd92fa244e..8435e016096621 100644
--- a/llvm/test/CodeGen/NVPTX/load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70
@@ -22,149 +23,297 @@
; generic statespace
-; CHECK-LABEL: generic_weak
define void @generic_weak(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_weak(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_weak_param_0];
+; CHECK-NEXT: ld.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_weak_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_weak_param_2];
+; CHECK-NEXT: st.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_weak_param_3];
+; CHECK-NEXT: ld.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.f64 %fd1, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.f64 [%rd4], %fd2;
+; CHECK-NEXT: ld.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load i8, ptr %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i8 %a.add, ptr %a
- ; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load i16, ptr %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i16 %b.add, ptr %b
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load i32, ptr %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store i32 %c.add, ptr %c
- ; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load i64, ptr %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store i64 %d.add, ptr %d
- ; CHECK: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load float, ptr %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store float %e.add, ptr %c
- ; CHECK: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load double, ptr %d
%f.add = fadd double %f.load, 1.
- ; CHECK: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr %d
; TODO: make the lowering of this weak vector ops consistent with
; the ones of the next tests. This test lowers to a weak PTX
; vector op, but next test lowers to a vector PTX op.
- ; CHECK: ld.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load <2 x i8>, ptr %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <2 x i8> %h.add, ptr %b
; TODO: make the lowering of this weak vector ops consistent with
; the ones of the previous test. This test lowers to a weak
; PTX scalar op, but prior test lowers to a vector PTX op.
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load <4 x i8>, ptr %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <4 x i8> %i.add, ptr %c
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load <2 x i16>, ptr %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <2 x i16> %j.add, ptr %c
- ; CHECK: ld.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load <4 x i16>, ptr %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <4 x i16> %k.add, ptr %d
- ; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load <2 x i32>, ptr %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store <2 x i32> %l.add, ptr %d
- ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load <4 x i32>, ptr %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store <4 x i32> %m.add, ptr %d
- ; CHECK: ld.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load <2 x i64>, ptr %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store <2 x i64> %n.add, ptr %d
- ; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load <2 x float>, ptr %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store <2 x float> %o.add, ptr %d
- ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load <4 x float>, ptr %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store <4 x float> %p.add, ptr %d
- ; CHECK: ld.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load <2 x double>, ptr %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store <2 x double> %q.add, ptr %d
ret void
}
-; CHECK-LABEL: generic_volatile
define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_param_0];
+; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_volatile_param_2];
+; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_volatile_param_3];
+; CHECK-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.volatile.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.volatile.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.volatile.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.volatile.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.volatile.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.volatile.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.volatile.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.volatile.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.volatile.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.volatile.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.volatile.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.volatile.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.volatile.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.volatile.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.volatile.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.volatile.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.volatile.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.volatile.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.volatile.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i8 %a.add, ptr %a
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load volatile i16, ptr %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i16 %b.add, ptr %b
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load volatile i32, ptr %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile i32 %c.add, ptr %c
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load volatile i64, ptr %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store volatile i64 %d.add, ptr %d
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load volatile float, ptr %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store volatile float %e.add, ptr %c
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load volatile double, ptr %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr %c
; TODO: volatile, atomic, and volatile atomic memory operations on vector types.
@@ -184,254 +333,358 @@ define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr
; TODO: make this operation consistent with the one for <4 x i8>
; This operation lowers to a "element wise volatile PTX operation".
- ; CHECK: ld.volatile.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load volatile <2 x i8>, ptr %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.volatile.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <2 x i8> %h.add, ptr %b
; TODO: make this operation consistent with the one for <2 x i8>
; This operation lowers to a "full vector volatile PTX operation".
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load volatile <4 x i8>, ptr %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <4 x i8> %i.add, ptr %c
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load volatile <2 x i16>, ptr %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <2 x i16> %j.add, ptr %c
- ; CHECK: ld.volatile.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load volatile <4 x i16>, ptr %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.volatile.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <4 x i16> %k.add, ptr %d
- ; CHECK: ld.volatile.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load volatile <2 x i32>, ptr %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.volatile.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <2 x i32> %l.add, ptr %d
- ; CHECK: ld.volatile.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load volatile <4 x i32>, ptr %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.volatile.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <4 x i32> %m.add, ptr %d
- ; CHECK: ld.volatile.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load volatile <2 x i64>, ptr %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.volatile.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store volatile <2 x i64> %n.add, ptr %d
- ; CHECK: ld.volatile.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load volatile <2 x float>, ptr %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.volatile.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <2 x float> %o.add, ptr %d
- ; CHECK: ld.volatile.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load volatile <4 x float>, ptr %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.volatile.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <4 x float> %p.add, ptr %d
- ; CHECK: ld.volatile.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load volatile <2 x double>, ptr %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.volatile.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store volatile <2 x double> %q.add, ptr %d
ret void
}
-; CHECK-LABEL: generic_unordered_sys
define void @generic_unordered_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: generic_unordered_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_param_0];
+; SM60-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [generic_unordered_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [generic_unordered_sys_param_2];
+; SM60-NEXT: st.volatile.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [generic_unordered_sys_param_3];
+; SM60-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [generic_unordered_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: generic_unordered_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [generic_unordered_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [generic_unordered_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [generic_unordered_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [generic_unordered_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [generic_unordered_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr %a unordered, align 1
- ; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr %b unordered, align 2
- ; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr %c unordered, align 4
- ; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr %d unordered, align 8
- ; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr %e unordered, align 4
- ; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr %e unordered, align 8
ret void
}
-; CHECK-LABEL: generic_unordered_volatile_sys
define void @generic_unordered_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_unordered_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_unordered_volatile_sys_param_0];
+; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_unordered_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_unordered_volatile_sys_param_2];
+; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_unordered_volatile_sys_param_3];
+; CHECK-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [generic_unordered_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.volatile.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.volatile.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr %a unordered, align 1
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr %b unordered, align 2
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr %c unordered, align 4
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr %d unordered, align 8
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr %e unordered, align 4
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr %e unordered, align 8
ret void
}
-; CHECK-LABEL: generic_monotonic_sys
define void @generic_monotonic_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: generic_monotonic_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_param_0];
+; SM60-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [generic_monotonic_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [generic_monotonic_sys_param_2];
+; SM60-NEXT: st.volatile.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [generic_monotonic_sys_param_3];
+; SM60-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [generic_monotonic_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: generic_monotonic_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [generic_monotonic_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [generic_monotonic_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [generic_monotonic_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [generic_monotonic_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [generic_monotonic_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr %a monotonic, align 1
- ; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr %b monotonic, align 2
- ; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr %c monotonic, align 4
- ; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr %d monotonic, align 8
- ; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr %e monotonic, align 4
- ; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr %e monotonic, align 8
ret void
}
-; CHECK-LABEL: generic_monotonic_volatile_sys
define void @generic_monotonic_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: generic_monotonic_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [generic_monotonic_volatile_sys_param_0];
+; CHECK-NEXT: ld.volatile.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [generic_monotonic_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [generic_monotonic_volatile_sys_param_2];
+; CHECK-NEXT: st.volatile.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [generic_monotonic_volatile_sys_param_3];
+; CHECK-NEXT: ld.volatile.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [generic_monotonic_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.volatile.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.volatile.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.volatile.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr %a monotonic, align 1
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr %b monotonic, align 2
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr %c monotonic, align 4
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr %d monotonic, align 8
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr %e monotonic, align 4
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr %e monotonic, align 8
ret void
@@ -439,415 +692,711 @@ define void @generic_monotonic_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr
;; global statespace
-; CHECK-LABEL: global_weak
define void @global_weak(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
- ; CHECK: ld.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: global_weak(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [global_weak_param_0];
+; CHECK-NEXT: ld.global.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [global_weak_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [global_weak_param_2];
+; CHECK-NEXT: st.global.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [global_weak_param_3];
+; CHECK-NEXT: ld.global.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.global.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.global.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.global.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.global.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.global.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.global.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.global.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.global.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.global.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.global.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.global.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.global.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.global.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.global.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.global.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.global.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.global.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.global.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.global.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.global.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.global.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.global.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.global.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.global.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.global.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.global.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.global.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.global.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.global.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(1) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i8 %a.add, ptr addrspace(1) %a
- ; CHECK: ld.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load i16, ptr addrspace(1) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i16 %b.add, ptr addrspace(1) %b
- ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load i32, ptr addrspace(1) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store i32 %c.add, ptr addrspace(1) %c
- ; CHECK: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load i64, ptr addrspace(1) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store i64 %d.add, ptr addrspace(1) %d
- ; CHECK: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load float, ptr addrspace(1) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store float %e.add, ptr addrspace(1) %c
- ; CHECK: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load double, ptr addrspace(1) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(1) %c
- ; CHECK: ld.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load <2 x i8>, ptr addrspace(1) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <2 x i8> %h.add, ptr addrspace(1) %b
- ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load <4 x i8>, ptr addrspace(1) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <4 x i8> %i.add, ptr addrspace(1) %c
- ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load <2 x i16>, ptr addrspace(1) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <2 x i16> %j.add, ptr addrspace(1) %c
- ; CHECK: ld.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load <4 x i16>, ptr addrspace(1) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <4 x i16> %k.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load <2 x i32>, ptr addrspace(1) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store <2 x i32> %l.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load <4 x i32>, ptr addrspace(1) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store <4 x i32> %m.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load <2 x i64>, ptr addrspace(1) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store <2 x i64> %n.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load <2 x float>, ptr addrspace(1) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store <2 x float> %o.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load <4 x float>, ptr addrspace(1) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store <4 x float> %p.add, ptr addrspace(1) %d
- ; CHECK: ld.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load <2 x double>, ptr addrspace(1) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store <2 x double> %q.add, ptr addrspace(1) %d
ret void
}
-; CHECK-LABEL: global_volatile
define void @global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: global_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_param_0];
+; CHECK-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [global_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [global_volatile_param_2];
+; CHECK-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [global_volatile_param_3];
+; CHECK-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.global.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.volatile.global.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.volatile.global.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.global.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.volatile.global.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.global.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.volatile.global.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.volatile.global.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.volatile.global.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.volatile.global.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.volatile.global.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.volatile.global.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.volatile.global.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.volatile.global.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.volatile.global.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.volatile.global.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.volatile.global.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.volatile.global.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.volatile.global.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.volatile.global.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.volatile.global.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.volatile.global.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.volatile.global.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.volatile.global.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.volatile.global.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.global.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(1) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i8 %a.add, ptr addrspace(1) %a
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load volatile i16, ptr addrspace(1) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i16 %b.add, ptr addrspace(1) %b
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load volatile i32, ptr addrspace(1) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile i32 %c.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load volatile i64, ptr addrspace(1) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store volatile i64 %d.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load volatile float, ptr addrspace(1) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store volatile float %e.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load volatile double, ptr addrspace(1) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load volatile <2 x i8>, ptr addrspace(1) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.volatile.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile<2 x i8> %h.add, ptr addrspace(1) %b
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load volatile <4 x i8>, ptr addrspace(1) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile<4 x i8> %i.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load volatile <2 x i16>, ptr addrspace(1) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile<2 x i16> %j.add, ptr addrspace(1) %c
- ; CHECK: ld.volatile.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load volatile <4 x i16>, ptr addrspace(1) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.volatile.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile<4 x i16> %k.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load volatile <2 x i32>, ptr addrspace(1) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.volatile.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile<2 x i32> %l.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load volatile <4 x i32>, ptr addrspace(1) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.volatile.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile<4 x i32> %m.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load volatile <2 x i64>, ptr addrspace(1) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.volatile.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store volatile<2 x i64> %n.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load volatile <2 x float>, ptr addrspace(1) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.volatile.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile<2 x float> %o.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load volatile <4 x float>, ptr addrspace(1) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.volatile.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile<4 x float> %p.add, ptr addrspace(1) %d
- ; CHECK: ld.volatile.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load volatile <2 x double>, ptr addrspace(1) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.volatile.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store volatile<2 x double> %q.add, ptr addrspace(1) %d
ret void
}
-; CHECK-LABEL: global_unordered_sys
define void @global_unordered_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: global_unordered_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_sys_param_0];
+; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [global_unordered_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [global_unordered_sys_param_2];
+; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [global_unordered_sys_param_3];
+; SM60-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [global_unordered_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.global.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.global.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.global.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.global.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: global_unordered_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.global.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [global_unordered_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [global_unordered_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.global.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [global_unordered_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.global.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [global_unordered_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.global.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.global.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.global.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.global.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.global.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.global.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.global.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.global.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.global.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1
- ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(1) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(1) %b unordered, align 2
- ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(1) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(1) %c unordered, align 4
- ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(1) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(1) %d unordered, align 8
- ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(1) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(1) %e unordered, align 4
- ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(1) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(1) %e unordered, align 8
ret void
}
-; CHECK-LABEL: global_unordered_volatile_sys
define void @global_unordered_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: global_unordered_volatile_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_param_0];
+; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [global_unordered_volatile_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [global_unordered_volatile_sys_param_2];
+; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [global_unordered_volatile_sys_param_3];
+; SM60-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [global_unordered_volatile_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.global.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.global.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.global.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.global.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: global_unordered_volatile_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [global_unordered_volatile_sys_param_0];
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [global_unordered_volatile_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [global_unordered_volatile_sys_param_2];
+; SM70-NEXT: st.mmio.relaxed.sys.global.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [global_unordered_volatile_sys_param_3];
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [global_unordered_volatile_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u32 [%rd3], %r2;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.mmio.relaxed.sys.global.f32 [%rd5], %f2;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.mmio.relaxed.sys.global.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1
- ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(1) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(1) %b unordered, align 2
- ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(1) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(1) %c unordered, align 4
- ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(1) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(1) %d unordered, align 8
- ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(1) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(1) %e unordered, align 4
- ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(1) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(1) %e unordered, align 8
ret void
}
-; CHECK-LABEL: global_monotonic_sys
define void @global_monotonic_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: global_monotonic_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_param_0];
+; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [global_monotonic_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [global_monotonic_sys_param_2];
+; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [global_monotonic_sys_param_3];
+; SM60-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [global_monotonic_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.global.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.global.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.global.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.global.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: global_monotonic_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.global.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [global_monotonic_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [global_monotonic_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.global.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [global_monotonic_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.global.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [global_monotonic_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.global.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.global.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.global.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.global.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.global.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.global.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.global.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.global.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.global.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
- ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(1) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(1) %b monotonic, align 2
- ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(1) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(1) %c monotonic, align 4
- ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(1) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(1) %d monotonic, align 8
- ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(1) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(1) %e monotonic, align 4
- ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(1) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(1) %e monotonic, align 8
ret void
}
-; CHECK-LABEL: global_monotonic_volatile_sys
define void @global_monotonic_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: global_monotonic_volatile_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_param_0];
+; SM60-NEXT: ld.volatile.global.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [global_monotonic_volatile_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [global_monotonic_volatile_sys_param_2];
+; SM60-NEXT: st.volatile.global.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [global_monotonic_volatile_sys_param_3];
+; SM60-NEXT: ld.volatile.global.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [global_monotonic_volatile_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.global.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.global.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.global.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.global.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.global.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.global.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.global.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.global.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.global.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: global_monotonic_volatile_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [global_monotonic_volatile_sys_param_0];
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [global_monotonic_volatile_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [global_monotonic_volatile_sys_param_2];
+; SM70-NEXT: st.mmio.relaxed.sys.global.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [global_monotonic_volatile_sys_param_3];
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [global_monotonic_volatile_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u32 [%rd3], %r2;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.mmio.relaxed.sys.global.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.mmio.relaxed.sys.global.f32 [%rd5], %f2;
+; SM70-NEXT: ld.mmio.relaxed.sys.global.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.mmio.relaxed.sys.global.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
- ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(1) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(1) %b monotonic, align 2
- ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(1) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(1) %c monotonic, align 4
- ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(1) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(1) %d monotonic, align 8
- ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(1) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(1) %e monotonic, align 4
- ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(1) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(1) %e monotonic, align 8
ret void
@@ -855,391 +1404,643 @@ define void @global_monotonic_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1)
;; shared statespace
-; CHECK-LABEL: shared_weak
define void @shared_weak(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
- ; CHECK: ld.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: shared_weak(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [shared_weak_param_0];
+; CHECK-NEXT: ld.shared.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [shared_weak_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [shared_weak_param_2];
+; CHECK-NEXT: st.shared.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [shared_weak_param_3];
+; CHECK-NEXT: ld.shared.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.shared.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.shared.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.shared.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.shared.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.shared.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.shared.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.shared.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.shared.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.shared.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.shared.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.shared.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.shared.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.shared.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.shared.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.shared.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.shared.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.shared.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.shared.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.shared.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.shared.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.shared.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.shared.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.shared.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.shared.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.shared.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.shared.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.shared.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.shared.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.shared.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(3) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i8 %a.add, ptr addrspace(3) %a
- ; CHECK: ld.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load i16, ptr addrspace(3) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i16 %b.add, ptr addrspace(3) %b
- ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load i32, ptr addrspace(3) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store i32 %c.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load i64, ptr addrspace(3) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store i64 %d.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load float, ptr addrspace(3) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store float %e.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load double, ptr addrspace(3) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load <2 x i8>, ptr addrspace(3) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <2 x i8> %h.add, ptr addrspace(3) %b
- ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load <4 x i8>, ptr addrspace(3) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <4 x i8> %i.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load <2 x i16>, ptr addrspace(3) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <2 x i16> %j.add, ptr addrspace(3) %c
- ; CHECK: ld.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load <4 x i16>, ptr addrspace(3) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <4 x i16> %k.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load <2 x i32>, ptr addrspace(3) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store <2 x i32> %l.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load <4 x i32>, ptr addrspace(3) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store <4 x i32> %m.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load <2 x i64>, ptr addrspace(3) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store <2 x i64> %n.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load <2 x float>, ptr addrspace(3) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store <2 x float> %o.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load <4 x float>, ptr addrspace(3) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store <4 x float> %p.add, ptr addrspace(3) %d
- ; CHECK: ld.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load <2 x double>, ptr addrspace(3) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store <2 x double> %q.add, ptr addrspace(3) %d
ret void
}
-; CHECK-LABEL: shared_volatile
define void @shared_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: shared_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_param_0];
+; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [shared_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [shared_volatile_param_2];
+; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [shared_volatile_param_3];
+; CHECK-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.shared.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.volatile.shared.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.shared.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.volatile.shared.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.volatile.shared.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.volatile.shared.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.volatile.shared.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.volatile.shared.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.volatile.shared.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.volatile.shared.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.volatile.shared.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.volatile.shared.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.volatile.shared.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.volatile.shared.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.volatile.shared.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.volatile.shared.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.volatile.shared.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.volatile.shared.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.shared.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(3) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i8 %a.add, ptr addrspace(3) %a
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load volatile i16, ptr addrspace(3) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i16 %b.add, ptr addrspace(3) %b
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load volatile i32, ptr addrspace(3) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile i32 %c.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load volatile i64, ptr addrspace(3) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store volatile i64 %d.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load volatile float, ptr addrspace(3) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store volatile float %e.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load volatile double, ptr addrspace(3) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load volatile <2 x i8>, ptr addrspace(3) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.volatile.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <2 x i8> %h.add, ptr addrspace(3) %b
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load volatile <4 x i8>, ptr addrspace(3) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <4 x i8> %i.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load volatile <2 x i16>, ptr addrspace(3) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <2 x i16> %j.add, ptr addrspace(3) %c
- ; CHECK: ld.volatile.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load volatile <4 x i16>, ptr addrspace(3) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.volatile.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <4 x i16> %k.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load volatile <2 x i32>, ptr addrspace(3) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.volatile.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <2 x i32> %l.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load volatile <4 x i32>, ptr addrspace(3) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.volatile.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <4 x i32> %m.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load volatile <2 x i64>, ptr addrspace(3) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.volatile.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store volatile <2 x i64> %n.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load volatile <2 x float>, ptr addrspace(3) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.volatile.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <2 x float> %o.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load volatile <4 x float>, ptr addrspace(3) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.volatile.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <4 x float> %p.add, ptr addrspace(3) %d
- ; CHECK: ld.volatile.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load volatile <2 x double>, ptr addrspace(3) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.volatile.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store volatile <2 x double> %q.add, ptr addrspace(3) %d
ret void
}
-; CHECK-LABEL: shared_unordered_sys
define void @shared_unordered_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: shared_unordered_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_param_0];
+; SM60-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [shared_unordered_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [shared_unordered_sys_param_2];
+; SM60-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [shared_unordered_sys_param_3];
+; SM60-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [shared_unordered_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.shared.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.shared.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.shared.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.shared.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.shared.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.shared.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: shared_unordered_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [shared_unordered_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.shared.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [shared_unordered_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [shared_unordered_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.shared.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [shared_unordered_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.shared.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [shared_unordered_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.shared.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.shared.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.shared.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.shared.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.shared.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.shared.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1
- ; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(3) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(3) %b unordered, align 2
- ; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(3) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(3) %c unordered, align 4
- ; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(3) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(3) %d unordered, align 8
- ; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(3) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(3) %e unordered, align 4
- ; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(3) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(3) %e unordered, align 8
ret void
}
-; CHECK-LABEL: shared_unordered_volatile_sys
define void @shared_unordered_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: shared_unordered_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [shared_unordered_volatile_sys_param_0];
+; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [shared_unordered_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [shared_unordered_volatile_sys_param_2];
+; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [shared_unordered_volatile_sys_param_3];
+; CHECK-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [shared_unordered_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.shared.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.volatile.shared.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.shared.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(3) %a unordered, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(3) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(3) %b unordered, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(3) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(3) %c unordered, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(3) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(3) %d unordered, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(3) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(3) %e unordered, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(3) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(3) %e unordered, align 8
ret void
}
-; CHECK-LABEL: shared_monotonic_sys
define void @shared_monotonic_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; SM60-LABEL: shared_monotonic_sys(
+; SM60: {
+; SM60-NEXT: .reg .b16 %rs<5>;
+; SM60-NEXT: .reg .b32 %r<3>;
+; SM60-NEXT: .reg .f32 %f<3>;
+; SM60-NEXT: .reg .b64 %rd<8>;
+; SM60-NEXT: .reg .f64 %fd<3>;
+; SM60-EMPTY:
+; SM60-NEXT: // %bb.0:
+; SM60-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_param_0];
+; SM60-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; SM60-NEXT: ld.param.u64 %rd2, [shared_monotonic_sys_param_1];
+; SM60-NEXT: add.s16 %rs2, %rs1, 1;
+; SM60-NEXT: ld.param.u64 %rd3, [shared_monotonic_sys_param_2];
+; SM60-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; SM60-NEXT: ld.param.u64 %rd4, [shared_monotonic_sys_param_3];
+; SM60-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; SM60-NEXT: ld.param.u64 %rd5, [shared_monotonic_sys_param_4];
+; SM60-NEXT: add.s16 %rs4, %rs3, 1;
+; SM60-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; SM60-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; SM60-NEXT: add.s32 %r2, %r1, 1;
+; SM60-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; SM60-NEXT: ld.volatile.shared.u64 %rd6, [%rd4];
+; SM60-NEXT: add.s64 %rd7, %rd6, 1;
+; SM60-NEXT: st.volatile.shared.u64 [%rd4], %rd7;
+; SM60-NEXT: ld.volatile.shared.f32 %f1, [%rd5];
+; SM60-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM60-NEXT: st.volatile.shared.f32 [%rd5], %f2;
+; SM60-NEXT: ld.volatile.shared.f64 %fd1, [%rd5];
+; SM60-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM60-NEXT: st.volatile.shared.f64 [%rd5], %fd2;
+; SM60-NEXT: ret;
+;
+; SM70-LABEL: shared_monotonic_sys(
+; SM70: {
+; SM70-NEXT: .reg .b16 %rs<5>;
+; SM70-NEXT: .reg .b32 %r<3>;
+; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b64 %rd<8>;
+; SM70-NEXT: .reg .f64 %fd<3>;
+; SM70-EMPTY:
+; SM70-NEXT: // %bb.0:
+; SM70-NEXT: ld.param.u64 %rd1, [shared_monotonic_sys_param_0];
+; SM70-NEXT: ld.relaxed.sys.shared.u8 %rs1, [%rd1];
+; SM70-NEXT: ld.param.u64 %rd2, [shared_monotonic_sys_param_1];
+; SM70-NEXT: add.s16 %rs2, %rs1, 1;
+; SM70-NEXT: ld.param.u64 %rd3, [shared_monotonic_sys_param_2];
+; SM70-NEXT: st.relaxed.sys.shared.u8 [%rd1], %rs2;
+; SM70-NEXT: ld.param.u64 %rd4, [shared_monotonic_sys_param_3];
+; SM70-NEXT: ld.relaxed.sys.shared.u16 %rs3, [%rd2];
+; SM70-NEXT: ld.param.u64 %rd5, [shared_monotonic_sys_param_4];
+; SM70-NEXT: add.s16 %rs4, %rs3, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u16 [%rd2], %rs4;
+; SM70-NEXT: ld.relaxed.sys.shared.u32 %r1, [%rd3];
+; SM70-NEXT: add.s32 %r2, %r1, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u32 [%rd3], %r2;
+; SM70-NEXT: ld.relaxed.sys.shared.u64 %rd6, [%rd4];
+; SM70-NEXT: add.s64 %rd7, %rd6, 1;
+; SM70-NEXT: st.relaxed.sys.shared.u64 [%rd4], %rd7;
+; SM70-NEXT: ld.relaxed.sys.shared.f32 %f1, [%rd5];
+; SM70-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM70-NEXT: st.relaxed.sys.shared.f32 [%rd5], %f2;
+; SM70-NEXT: ld.relaxed.sys.shared.f64 %fd1, [%rd5];
+; SM70-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; SM70-NEXT: st.relaxed.sys.shared.f64 [%rd5], %fd2;
+; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
- ; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(3) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(3) %b monotonic, align 2
- ; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(3) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(3) %c monotonic, align 4
- ; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(3) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(3) %d monotonic, align 8
- ; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(3) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(3) %e monotonic, align 4
- ; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- ; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(3) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- ; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(3) %e monotonic, align 8
ret void
}
-; CHECK-LABEL: shared_monotonic_volatile_sys
define void @shared_monotonic_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: shared_monotonic_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [shared_monotonic_volatile_sys_param_0];
+; CHECK-NEXT: ld.volatile.shared.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [shared_monotonic_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [shared_monotonic_volatile_sys_param_2];
+; CHECK-NEXT: st.volatile.shared.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [shared_monotonic_volatile_sys_param_3];
+; CHECK-NEXT: ld.volatile.shared.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [shared_monotonic_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.volatile.shared.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.volatile.shared.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.volatile.shared.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.volatile.shared.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.volatile.shared.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.volatile.shared.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.volatile.shared.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.volatile.shared.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(3) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(3) %b monotonic, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(3) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(3) %c monotonic, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(3) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(3) %d monotonic, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(3) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(3) %e monotonic, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(3) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(3) %e monotonic, align 8
ret void
@@ -1247,367 +2048,575 @@ define void @shared_monotonic_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3)
;; local statespace
-; CHECK-LABEL: local_weak
define void @local_weak(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_weak(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_weak_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_weak_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_weak_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_weak_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.local.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.local.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.local.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.local.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.local.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.local.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.local.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.local.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.local.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.local.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.local.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.local.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.local.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.local.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.local.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.local.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.local.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.local.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.local.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i8 %a.add, ptr addrspace(5) %a
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load i16, ptr addrspace(5) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store i16 %b.add, ptr addrspace(5) %b
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load i32, ptr addrspace(5) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store i32 %c.add, ptr addrspace(5) %c
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load i64, ptr addrspace(5) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store i64 %d.add, ptr addrspace(5) %d
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load float, ptr addrspace(5) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store float %e.add, ptr addrspace(5) %c
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load double, ptr addrspace(5) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(5) %c
- ; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load <2 x i8>, ptr addrspace(5) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <2 x i8> %h.add, ptr addrspace(5) %b
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load <4 x i8>, ptr addrspace(5) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <4 x i8> %i.add, ptr addrspace(5) %c
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load <2 x i16>, ptr addrspace(5) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store <2 x i16> %j.add, ptr addrspace(5) %c
- ; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load <4 x i16>, ptr addrspace(5) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store <4 x i16> %k.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load <2 x i32>, ptr addrspace(5) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store <2 x i32> %l.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load <4 x i32>, ptr addrspace(5) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store <4 x i32> %m.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load <2 x i64>, ptr addrspace(5) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store <2 x i64> %n.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load <2 x float>, ptr addrspace(5) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store <2 x float> %o.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load <4 x float>, ptr addrspace(5) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store <4 x float> %p.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load <2 x double>, ptr addrspace(5) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store <2 x double> %q.add, ptr addrspace(5) %d
ret void
}
-; CHECK-LABEL: local_volatile
define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<29>;
+; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .f32 %f<15>;
+; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .f64 %fd<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_volatile_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_volatile_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd5, [%rd4];
+; CHECK-NEXT: add.s64 %rd6, %rd5, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd6;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd3];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd3], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd3];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd3], %fd2;
+; CHECK-NEXT: ld.local.v2.u8 {%rs5, %rs6}, [%rd2];
+; CHECK-NEXT: add.s16 %rs7, %rs6, 1;
+; CHECK-NEXT: add.s16 %rs8, %rs5, 1;
+; CHECK-NEXT: st.local.v2.u8 [%rd2], {%rs8, %rs7};
+; CHECK-NEXT: ld.local.u32 %r3, [%rd3];
+; CHECK-NEXT: bfe.u32 %r4, %r3, 0, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs9, %r4;
+; CHECK-NEXT: add.s16 %rs10, %rs9, 1;
+; CHECK-NEXT: cvt.u32.u16 %r5, %rs10;
+; CHECK-NEXT: bfe.u32 %r6, %r3, 8, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs11, %r6;
+; CHECK-NEXT: add.s16 %rs12, %rs11, 1;
+; CHECK-NEXT: cvt.u32.u16 %r7, %rs12;
+; CHECK-NEXT: bfi.b32 %r8, %r7, %r5, 8, 8;
+; CHECK-NEXT: bfe.u32 %r9, %r3, 16, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs13, %r9;
+; CHECK-NEXT: add.s16 %rs14, %rs13, 1;
+; CHECK-NEXT: cvt.u32.u16 %r10, %rs14;
+; CHECK-NEXT: bfi.b32 %r11, %r10, %r8, 16, 8;
+; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
+; CHECK-NEXT: cvt.u16.u32 %rs15, %r12;
+; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
+; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
+; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
+; CHECK-NEXT: st.local.u32 [%rd3], %r14;
+; CHECK-NEXT: ld.local.u32 %r15, [%rd3];
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
+; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
+; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
+; CHECK-NEXT: st.local.u32 [%rd3], %r16;
+; CHECK-NEXT: ld.local.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
+; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
+; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
+; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
+; CHECK-NEXT: st.local.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
+; CHECK-NEXT: ld.local.v2.u32 {%r17, %r18}, [%rd4];
+; CHECK-NEXT: add.s32 %r19, %r18, 1;
+; CHECK-NEXT: add.s32 %r20, %r17, 1;
+; CHECK-NEXT: st.local.v2.u32 [%rd4], {%r20, %r19};
+; CHECK-NEXT: ld.local.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
+; CHECK-NEXT: add.s32 %r25, %r24, 1;
+; CHECK-NEXT: add.s32 %r26, %r23, 1;
+; CHECK-NEXT: add.s32 %r27, %r22, 1;
+; CHECK-NEXT: add.s32 %r28, %r21, 1;
+; CHECK-NEXT: st.local.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
+; CHECK-NEXT: ld.local.v2.u64 {%rd7, %rd8}, [%rd4];
+; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
+; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
+; CHECK-NEXT: st.local.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: ld.local.v2.f32 {%f3, %f4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
+; CHECK-NEXT: st.local.v2.f32 [%rd4], {%f6, %f5};
+; CHECK-NEXT: ld.local.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
+; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
+; CHECK-NEXT: st.local.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
+; CHECK-NEXT: ld.local.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
+; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.v2.f64 [%rd4], {%fd6, %fd5};
+; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i8 %a.add, ptr addrspace(5) %a
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load volatile i16, ptr addrspace(5) %b
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store volatile i16 %b.add, ptr addrspace(5) %b
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load volatile i32, ptr addrspace(5) %c
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile i32 %c.add, ptr addrspace(5) %c
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load volatile i64, ptr addrspace(5) %d
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store volatile i64 %d.add, ptr addrspace(5) %d
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load volatile float, ptr addrspace(5) %c
%e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store volatile float %e.add, ptr addrspace(5) %c
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load volatile double, ptr addrspace(5) %c
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(5) %c
- ; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%h.load = load volatile <2 x i8>, ptr addrspace(5) %b
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
- ; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <2 x i8> %h.add, ptr addrspace(5) %b
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%i.load = load volatile <4 x i8>, ptr addrspace(5) %c
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <4 x i8> %i.add, ptr addrspace(5) %c
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%j.load = load volatile <2 x i16>, ptr addrspace(5) %c
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store volatile <2 x i16> %j.add, ptr addrspace(5) %c
- ; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
%k.load = load volatile <4 x i16>, ptr addrspace(5) %d
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
- ; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
store volatile <4 x i16> %k.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%l.load = load volatile <2 x i32>, ptr addrspace(5) %d
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
- ; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <2 x i32> %l.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
%m.load = load volatile <4 x i32>, ptr addrspace(5) %d
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
- ; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
store volatile <4 x i32> %m.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%n.load = load volatile <2 x i64>, ptr addrspace(5) %d
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
- ; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
store volatile <2 x i64> %n.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%o.load = load volatile <2 x float>, ptr addrspace(5) %d
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
- ; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <2 x float> %o.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
%p.load = load volatile <4 x float>, ptr addrspace(5) %d
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
- ; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
store volatile <4 x float> %p.add, ptr addrspace(5) %d
- ; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
%q.load = load volatile <2 x double>, ptr addrspace(5) %d
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
- ; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
store volatile <2 x double> %q.add, ptr addrspace(5) %d
ret void
}
-; CHECK-LABEL: local_unordered_sys
define void @local_unordered_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_unordered_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_sys_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_unordered_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_unordered_sys_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_unordered_sys_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [local_unordered_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(5) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(5) %a unordered, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(5) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(5) %b unordered, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(5) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(5) %c unordered, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(5) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(5) %d unordered, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(5) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(5) %e unordered, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(5) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(5) %e unordered, align 8
ret void
}
-; CHECK-LABEL: local_unordered_volatile_sys
define void @local_unordered_volatile_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_unordered_volatile_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_unordered_volatile_sys_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_unordered_volatile_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_unordered_volatile_sys_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_unordered_volatile_sys_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [local_unordered_volatile_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(5) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(5) %a unordered, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(5) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(5) %b unordered, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(5) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(5) %c unordered, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(5) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(5) %d unordered, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(5) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(5) %e unordered, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(5) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(5) %e unordered, align 8
ret void
}
-; CHECK-LABEL: local_monotonic_sys
define void @local_monotonic_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_monotonic_sys(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_sys_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_monotonic_sys_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_monotonic_sys_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_monotonic_sys_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [local_monotonic_sys_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(5) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(5) %b monotonic, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(5) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(5) %c monotonic, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(5) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(5) %d monotonic, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(5) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(5) %e monotonic, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(5) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(5) %e monotonic, align 8
ret void
}
-; CHECK-LABEL: local_monotonic_volatile
define void @local_monotonic_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK-LABEL: local_monotonic_volatile(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [local_monotonic_volatile_param_0];
+; CHECK-NEXT: ld.local.u8 %rs1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [local_monotonic_volatile_param_1];
+; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
+; CHECK-NEXT: ld.param.u64 %rd3, [local_monotonic_volatile_param_2];
+; CHECK-NEXT: st.local.u8 [%rd1], %rs2;
+; CHECK-NEXT: ld.param.u64 %rd4, [local_monotonic_volatile_param_3];
+; CHECK-NEXT: ld.local.u16 %rs3, [%rd2];
+; CHECK-NEXT: ld.param.u64 %rd5, [local_monotonic_volatile_param_4];
+; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
+; CHECK-NEXT: st.local.u16 [%rd2], %rs4;
+; CHECK-NEXT: ld.local.u32 %r1, [%rd3];
+; CHECK-NEXT: add.s32 %r2, %r1, 1;
+; CHECK-NEXT: st.local.u32 [%rd3], %r2;
+; CHECK-NEXT: ld.local.u64 %rd6, [%rd4];
+; CHECK-NEXT: add.s64 %rd7, %rd6, 1;
+; CHECK-NEXT: st.local.u64 [%rd4], %rd7;
+; CHECK-NEXT: ld.local.f32 %f1, [%rd5];
+; CHECK-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; CHECK-NEXT: st.local.f32 [%rd5], %f2;
+; CHECK-NEXT: ld.local.f64 %fd1, [%rd5];
+; CHECK-NEXT: add.rn.f64 %fd2, %fd1, 0d3FF0000000000000;
+; CHECK-NEXT: st.local.f64 [%rd5], %fd2;
+; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(5) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(5) %b monotonic, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(5) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(5) %c monotonic, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(5) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(5) %d monotonic, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(5) %e monotonic, align 4
%e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(5) %e monotonic, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(5) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(5) %e monotonic, align 8
ret void
diff --git a/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll b/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll
index e139d3c9a9df0e..5bd3580f5e95ec 100644
--- a/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll
+++ b/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll
@@ -1,12 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
; The load is to the high byte of the 2-byte store
@g = global i8 -75
define void @f(i16 %v) {
-; CHECK-LABEL: f
-; CHECK: sth 3, -2(1)
-; CHECK: lbz 3, -2(1)
+; CHECK-LABEL: f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis 4, 2, .LC0 at toc@ha
+; CHECK-NEXT: sth 3, -2(1)
+; CHECK-NEXT: ld 4, .LC0 at toc@l(4)
+; CHECK-NEXT: lbz 3, -2(1)
+; CHECK-NEXT: stb 3, 0(4)
+; CHECK-NEXT: blr
%p32 = alloca i16
store i16 %v, ptr %p32
%tmp = load i8, ptr %p32
>From da9d77053ab64798635c63dedb207356d54e8e41 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 22 Oct 2024 15:07:17 +0100
Subject: [PATCH 2/3] EarlyCSE: create casts on type-mismatch
getOrCreateResult suffers from the deficiency that it doesn't attempt to
create casts when types mismatch. Fix this deficiency, making EarlyCSE
more powerful.
---
llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 62 +-
.../AMDGPU/GlobalISel/irtranslator-call.ll | 4 +-
.../branch-folding-implicit-def-subreg.ll | 640 +++++++++---------
llvm/test/CodeGen/NVPTX/load-store.ll | 272 +++++---
.../PowerPC/big-endian-store-forward.ll | 1 -
llvm/test/CodeGen/PowerPC/p10-spill-creq.ll | 62 +-
.../Transforms/EarlyCSE/invariant.start.ll | 30 +-
llvm/test/Transforms/EarlyCSE/opaque-ptr.ll | 16 +-
8 files changed, 590 insertions(+), 497 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index a1dbb4e1d5e75f..9714611cda8b0f 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -964,32 +965,45 @@ class EarlyCSE {
bool overridingStores(const ParseMemoryInst &Earlier,
const ParseMemoryInst &Later);
- Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
- // TODO: We could insert relevant casts on type mismatch here.
- if (auto *LI = dyn_cast<LoadInst>(Inst))
- return LI->getType() == ExpectedType ? LI : nullptr;
- if (auto *SI = dyn_cast<StoreInst>(Inst)) {
- Value *V = SI->getValueOperand();
- return V->getType() == ExpectedType ? V : nullptr;
+ Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
+ if (!isa<IntrinsicInst, LoadInst, StoreInst>(Inst))
+ llvm_unreachable("Instruction not supported");
+
+ // The load or the store's first operand.
+ Value *V;
+ if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+ if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ V = II;
+ break;
+ case Intrinsic::masked_store:
+ V = II->getOperand(0);
+ break;
+ default:
+ return nullptr;
+ }
+ else
+ return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
+ } else {
+ V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
}
- assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
- auto *II = cast<IntrinsicInst>(Inst);
- if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
- return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
- return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
- }
- Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
- Type *ExpectedType) const {
- // TODO: We could insert relevant casts on type mismatch here.
- switch (II->getIntrinsicID()) {
- case Intrinsic::masked_load:
- return II->getType() == ExpectedType ? II : nullptr;
- case Intrinsic::masked_store: {
- Value *V = II->getOperand(0);
- return V->getType() == ExpectedType ? V : nullptr;
- }
- }
+ Type *ActualType = V->getType();
+ BasicBlock *TheBB = Inst->getParent();
+
+ // First handle the case when no cast is required.
+ if (ActualType == ExpectedType)
+ return V;
+
+ // Try to create BitCast, SExt, or Trunc.
+ IRBuilder<> Builder(TheBB, std::next(Inst->getIterator()));
+ if (CastInst::castIsValid(Instruction::BitCast, V, ExpectedType))
+ return Builder.CreateBitCast(V, ExpectedType);
+ if (CastInst::castIsValid(Instruction::SExt, V, ExpectedType))
+ return Builder.CreateSExt(V, ExpectedType);
+ if (CastInst::castIsValid(Instruction::Trunc, V, ExpectedType))
+ return Builder.CreateTrunc(V, ExpectedType);
return nullptr;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index c3694158e7b971..6fe26286b74c22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -3683,7 +3683,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD2]](s8)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
@@ -3720,7 +3720,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: G_STORE [[COPY18]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
- ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
+ ; CHECK-NEXT: G_STORE [[SEXT]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 055e9850de3d68..265204726da124 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -4,7 +4,7 @@
define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 %arg2, i1 %arg3, i1 %arg4, i1 %arg5, i1 %arg6, ptr addrspace(3) %arg7, ptr addrspace(3) %arg8, ptr addrspace(3) %arg9, ptr addrspace(3) %arg10) {
; GFX90A-LABEL: name: f1
; GFX90A: bb.0.bb:
- ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.1(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr15, $sgpr10_sgpr11
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr32 = S_MOV_B32 0
@@ -30,39 +30,25 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr2 = DS_READ_B32_gfx9 renamable $vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) null`, align 8, addrspace 3)
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr24_sgpr25, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.56, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1.bb103:
- ; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.2(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.59, implicit $vcc
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.2:
- ; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: successors: %bb.60(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
- ; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 0
+ ; GFX90A-NEXT: S_BRANCH %bb.60
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.3.Flow17:
- ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.58(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
- ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.58, implicit $vcc
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.4.bb15:
- ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
+ ; GFX90A-NEXT: bb.2.bb15:
+ ; GFX90A-NEXT: successors: %bb.33(0x40000000), %bb.3(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr42_sgpr43
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr2_vgpr3, implicit $exec
@@ -73,10 +59,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr40, renamable $vcc = V_ADD_CO_U32_e64 $vgpr46, killed $vgpr0, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr41, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr47, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.35, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.33, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.5:
- ; GFX90A-NEXT: successors: %bb.6(0x80000000)
+ ; GFX90A-NEXT: bb.3:
+ ; GFX90A-NEXT: successors: %bb.4(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr42_sgpr43
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
@@ -103,96 +89,96 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.6.Flow20:
- ; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.4.Flow20:
+ ; GFX90A-NEXT: successors: %bb.5(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr19 = COPY renamable $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr18 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr21 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr20 = COPY $sgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr52 = COPY $sgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr25 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr23 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr22 = COPY $sgpr15, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr25 = COPY $sgpr15, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr24 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.7.Flow19:
- ; GFX90A-NEXT: successors: %bb.63(0x40000000), %bb.8(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.5.Flow19:
+ ; GFX90A-NEXT: successors: %bb.65(0x40000000), %bb.6(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr25, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
; GFX90A-NEXT: $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $sgpr28_sgpr29, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.63, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.65, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.8.Flow32:
- ; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
+ ; GFX90A-NEXT: bb.6.Flow32:
+ ; GFX90A-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def $scc
; GFX90A-NEXT: $sgpr8_sgpr9 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr8_sgpr9 = S_XOR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.9.bb89:
- ; GFX90A-NEXT: successors: %bb.10(0x80000000)
+ ; GFX90A-NEXT: bb.7.bb89:
+ ; GFX90A-NEXT: successors: %bb.8(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.10.Flow33:
- ; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
+ ; GFX90A-NEXT: bb.8.Flow33:
+ ; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def $scc
; GFX90A-NEXT: $sgpr8_sgpr9 = S_AND_SAVEEXEC_B64 $sgpr56_sgpr57, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr8_sgpr9 = S_XOR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.11.bb84:
- ; GFX90A-NEXT: successors: %bb.12(0x80000000)
+ ; GFX90A-NEXT: bb.9.bb84:
+ ; GFX90A-NEXT: successors: %bb.10(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.12.Flow34:
- ; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000)
+ ; GFX90A-NEXT: bb.10.Flow34:
+ ; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def $scc
; GFX90A-NEXT: $sgpr8_sgpr9 = S_AND_SAVEEXEC_B64 $sgpr52_sgpr53, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr8_sgpr9 = S_XOR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.13.bb79:
- ; GFX90A-NEXT: successors: %bb.14(0x80000000)
+ ; GFX90A-NEXT: bb.11.bb79:
+ ; GFX90A-NEXT: successors: %bb.12(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.14.Flow35:
- ; GFX90A-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000)
+ ; GFX90A-NEXT: bb.12.Flow35:
+ ; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def $scc
; GFX90A-NEXT: $sgpr8_sgpr9 = S_AND_SAVEEXEC_B64 $sgpr16_sgpr17, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.16, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.15.bb72:
- ; GFX90A-NEXT: successors: %bb.16(0x80000000)
+ ; GFX90A-NEXT: bb.13.bb72:
+ ; GFX90A-NEXT: successors: %bb.14(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr8 = S_ADD_U32 renamable $sgpr6, 48, implicit-def $scc
@@ -202,162 +188,162 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @f2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit undef $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.16.Flow36:
- ; GFX90A-NEXT: successors: %bb.17(0x40000000), %bb.18(0x40000000)
+ ; GFX90A-NEXT: bb.14.Flow36:
+ ; GFX90A-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000)
; GFX90A-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr50_sgpr51, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.18, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.16, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.17.bb67:
- ; GFX90A-NEXT: successors: %bb.18(0x80000000)
+ ; GFX90A-NEXT: bb.15.bb67:
+ ; GFX90A-NEXT: successors: %bb.16(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.18.Flow37:
- ; GFX90A-NEXT: successors: %bb.19(0x40000000), %bb.20(0x40000000)
+ ; GFX90A-NEXT: bb.16.Flow37:
+ ; GFX90A-NEXT: successors: %bb.17(0x40000000), %bb.18(0x40000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr48_sgpr49, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.20, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.18, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.19.bb62:
- ; GFX90A-NEXT: successors: %bb.20(0x80000000)
+ ; GFX90A-NEXT: bb.17.bb62:
+ ; GFX90A-NEXT: successors: %bb.18(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.20.Flow38:
- ; GFX90A-NEXT: successors: %bb.21(0x40000000), %bb.22(0x40000000)
+ ; GFX90A-NEXT: bb.18.Flow38:
+ ; GFX90A-NEXT: successors: %bb.19(0x40000000), %bb.20(0x40000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.22, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.20, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.21.bb54:
- ; GFX90A-NEXT: successors: %bb.22(0x80000000)
+ ; GFX90A-NEXT: bb.19.bb54:
+ ; GFX90A-NEXT: successors: %bb.20(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.22.Flow39:
- ; GFX90A-NEXT: successors: %bb.23(0x40000000), %bb.24(0x40000000)
+ ; GFX90A-NEXT: bb.20.Flow39:
+ ; GFX90A-NEXT: successors: %bb.21(0x40000000), %bb.22(0x40000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr44_sgpr45, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.24, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.22, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.23.bb47:
- ; GFX90A-NEXT: successors: %bb.24(0x80000000)
+ ; GFX90A-NEXT: bb.21.bb47:
+ ; GFX90A-NEXT: successors: %bb.22(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.24.Flow40:
- ; GFX90A-NEXT: successors: %bb.25(0x40000000), %bb.26(0x40000000)
+ ; GFX90A-NEXT: bb.22.Flow40:
+ ; GFX90A-NEXT: successors: %bb.23(0x40000000), %bb.24(0x40000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.26, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.24, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.25.bb40:
- ; GFX90A-NEXT: successors: %bb.26(0x80000000)
+ ; GFX90A-NEXT: bb.23.bb40:
+ ; GFX90A-NEXT: successors: %bb.24(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.26.Flow41:
- ; GFX90A-NEXT: successors: %bb.27(0x40000000), %bb.28(0x40000000)
+ ; GFX90A-NEXT: bb.24.Flow41:
+ ; GFX90A-NEXT: successors: %bb.25(0x40000000), %bb.26(0x40000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr38_sgpr39, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.28, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.26, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.27.bb33:
- ; GFX90A-NEXT: successors: %bb.28(0x80000000)
+ ; GFX90A-NEXT: bb.25.bb33:
+ ; GFX90A-NEXT: successors: %bb.26(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.28.Flow42:
- ; GFX90A-NEXT: successors: %bb.34(0x40000000), %bb.29(0x40000000)
+ ; GFX90A-NEXT: bb.26.Flow42:
+ ; GFX90A-NEXT: successors: %bb.32(0x40000000), %bb.27(0x40000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.34, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.32, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.29.Flow43:
- ; GFX90A-NEXT: successors: %bb.30(0x40000000), %bb.31(0x40000000)
+ ; GFX90A-NEXT: bb.27.Flow43:
+ ; GFX90A-NEXT: successors: %bb.28(0x40000000), %bb.29(0x40000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.31, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.29, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.30.bb19:
- ; GFX90A-NEXT: successors: %bb.31(0x80000000)
+ ; GFX90A-NEXT: bb.28.bb19:
+ ; GFX90A-NEXT: successors: %bb.29(0x80000000)
; GFX90A-NEXT: liveins: $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.31.Flow44:
- ; GFX90A-NEXT: successors: %bb.32(0x40000000), %bb.33(0x40000000)
+ ; GFX90A-NEXT: bb.29.Flow44:
+ ; GFX90A-NEXT: successors: %bb.30(0x40000000), %bb.31(0x40000000)
; GFX90A-NEXT: liveins: $sgpr54_sgpr55, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr54_sgpr55, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.33, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.31, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.32.UnifiedUnreachableBlock:
- ; GFX90A-NEXT: successors: %bb.33(0x80000000)
+ ; GFX90A-NEXT: bb.30.UnifiedUnreachableBlock:
+ ; GFX90A-NEXT: successors: %bb.31(0x80000000)
; GFX90A-NEXT: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: SI_MASKED_UNREACHABLE
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.33.UnifiedReturnBlock:
+ ; GFX90A-NEXT: bb.31.UnifiedReturnBlock:
; GFX90A-NEXT: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: S_ENDPGM 0
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.34.bb26:
- ; GFX90A-NEXT: successors: %bb.29(0x80000000)
+ ; GFX90A-NEXT: bb.32.bb26:
+ ; GFX90A-NEXT: successors: %bb.27(0x80000000)
; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr54_sgpr55, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: S_BRANCH %bb.29
+ ; GFX90A-NEXT: S_BRANCH %bb.27
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.35.bb20:
- ; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000)
+ ; GFX90A-NEXT: bb.33.bb20:
+ ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.34(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr42_sgpr43
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec :: (load (s8) from %ir.i21, addrspace 1)
@@ -387,24 +373,24 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.37, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.35, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.36.Flow21:
- ; GFX90A-NEXT: successors: %bb.6(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.34.Flow21:
+ ; GFX90A-NEXT: successors: %bb.4(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def $scc
- ; GFX90A-NEXT: S_BRANCH %bb.6
+ ; GFX90A-NEXT: S_BRANCH %bb.4
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.37.bb27:
- ; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000)
+ ; GFX90A-NEXT: bb.35.bb27:
+ ; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr42_sgpr43, $sgpr56_sgpr57, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr40_sgpr41
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec :: (load (s8) from %ir.i28, addrspace 1)
@@ -426,18 +412,18 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.39, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.37, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.38.Flow22:
- ; GFX90A-NEXT: successors: %bb.36(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.36.Flow22:
+ ; GFX90A-NEXT: successors: %bb.34(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -454,10 +440,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_ANDN2_B64 killed renamable $sgpr28_sgpr29, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_OR_B64 killed renamable $sgpr28_sgpr29, killed renamable $sgpr54_sgpr55, implicit-def dead $scc
- ; GFX90A-NEXT: S_BRANCH %bb.36
+ ; GFX90A-NEXT: S_BRANCH %bb.34
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.39.bb34:
- ; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000)
+ ; GFX90A-NEXT: bb.37.bb34:
+ ; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr50_sgpr51, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr44_sgpr45
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec :: (load (s8) from %ir.i35, addrspace 1)
@@ -476,18 +462,18 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.41, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.39, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.40.Flow23:
- ; GFX90A-NEXT: successors: %bb.38(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.38.Flow23:
+ ; GFX90A-NEXT: successors: %bb.36(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr16_sgpr17 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -503,10 +489,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_ANDN2_B64 renamable $sgpr28_sgpr29, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_OR_B64 killed renamable $sgpr38_sgpr39, killed renamable $sgpr40_sgpr41, implicit-def dead $scc
- ; GFX90A-NEXT: S_BRANCH %bb.38
+ ; GFX90A-NEXT: S_BRANCH %bb.36
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.41.bb41:
- ; GFX90A-NEXT: successors: %bb.47(0x40000000), %bb.42(0x40000000)
+ ; GFX90A-NEXT: bb.39.bb41:
+ ; GFX90A-NEXT: successors: %bb.45(0x40000000), %bb.40(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr56_sgpr57, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr50_sgpr51, $sgpr48_sgpr49, $sgpr46_sgpr47
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
@@ -527,18 +513,18 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.47, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.45, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.42.Flow24:
- ; GFX90A-NEXT: successors: %bb.40(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.40.Flow24:
+ ; GFX90A-NEXT: successors: %bb.38(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
; GFX90A-NEXT: renamable $vgpr59 = COPY killed renamable $vgpr18, implicit $exec
@@ -554,10 +540,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr16_sgpr17 = S_ANDN2_B64 renamable $sgpr28_sgpr29, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_OR_B64 killed renamable $sgpr16_sgpr17, killed renamable $sgpr54_sgpr55, implicit-def dead $scc
- ; GFX90A-NEXT: S_BRANCH %bb.40
+ ; GFX90A-NEXT: S_BRANCH %bb.38
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.43.bb55:
- ; GFX90A-NEXT: successors: %bb.49(0x40000000), %bb.44(0x40000000)
+ ; GFX90A-NEXT: bb.41.bb55:
+ ; GFX90A-NEXT: successors: %bb.47(0x40000000), %bb.42(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr52_sgpr53, $sgpr44_sgpr45
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: S_BITCMP1_B32 killed renamable $sgpr33, 16, implicit-def $scc
@@ -567,10 +553,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr62 = V_ADD_CO_U32_e32 6144, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr63, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr46_sgpr47, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.49, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.47, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.44:
- ; GFX90A-NEXT: successors: %bb.45(0x80000000)
+ ; GFX90A-NEXT: bb.42:
+ ; GFX90A-NEXT: successors: %bb.43(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr52_sgpr53
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = COPY renamable $sgpr28_sgpr29
@@ -581,23 +567,23 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.45.Flow26:
- ; GFX90A-NEXT: successors: %bb.46(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr16, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr60, $vgpr61, $vgpr62, $vgpr63
+ ; GFX90A-NEXT: bb.43.Flow26:
+ ; GFX90A-NEXT: successors: %bb.44(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr16, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr60, $vgpr61, $vgpr62, $vgpr63
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.46.Flow26:
- ; GFX90A-NEXT: successors: %bb.48(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.44.Flow26:
+ ; GFX90A-NEXT: successors: %bb.46(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc
@@ -609,10 +595,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr28_sgpr29, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc
- ; GFX90A-NEXT: S_BRANCH %bb.48
+ ; GFX90A-NEXT: S_BRANCH %bb.46
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.47.bb48:
- ; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.48(0x40000000)
+ ; GFX90A-NEXT: bb.45.bb48:
+ ; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.46(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr56_sgpr57, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr50_sgpr51, $sgpr44_sgpr45
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
@@ -635,18 +621,18 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr16_sgpr17 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.43, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.41, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.48.Flow25:
- ; GFX90A-NEXT: successors: %bb.42(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.46.Flow25:
+ ; GFX90A-NEXT: successors: %bb.40(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr16_sgpr17, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -660,17 +646,17 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_ANDN2_B64 renamable $sgpr28_sgpr29, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_OR_B64 killed renamable $sgpr42_sgpr43, killed renamable $sgpr54_sgpr55, implicit-def dead $scc
- ; GFX90A-NEXT: S_BRANCH %bb.42
+ ; GFX90A-NEXT: S_BRANCH %bb.40
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.49.bb63:
- ; GFX90A-NEXT: successors: %bb.51(0x40000000), %bb.50(0x40000000)
+ ; GFX90A-NEXT: bb.47.bb63:
+ ; GFX90A-NEXT: successors: %bb.49(0x40000000), %bb.48(0x40000000)
; GFX90A-NEXT: liveins: $vcc, $sgpr12, $sgpr13, $sgpr14, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr54_sgpr55:0x000000000000000F, $sgpr62_sgpr63, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr44_sgpr45, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr52_sgpr53
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.51, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.49, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.50:
- ; GFX90A-NEXT: successors: %bb.45(0x80000000)
+ ; GFX90A-NEXT: bb.48:
+ ; GFX90A-NEXT: successors: %bb.43(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr44_sgpr45, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr52_sgpr53
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = COPY renamable $sgpr28_sgpr29
@@ -681,26 +667,26 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_BRANCH %bb.45
+ ; GFX90A-NEXT: S_BRANCH %bb.43
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.51.bb68:
- ; GFX90A-NEXT: successors: %bb.55(0x40000000), %bb.52(0x40000000)
+ ; GFX90A-NEXT: bb.49.bb68:
+ ; GFX90A-NEXT: successors: %bb.53(0x40000000), %bb.50(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr54_sgpr55:0x000000000000000F, $sgpr62_sgpr63, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr52_sgpr53
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr30, implicit $exec
; GFX90A-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr46_sgpr47, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.55, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.53, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.52:
- ; GFX90A-NEXT: successors: %bb.46(0x80000000)
+ ; GFX90A-NEXT: bb.50:
+ ; GFX90A-NEXT: successors: %bb.44(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr52_sgpr53
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1
@@ -711,26 +697,26 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_BRANCH %bb.46
+ ; GFX90A-NEXT: S_BRANCH %bb.44
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.53.bb80:
- ; GFX90A-NEXT: successors: %bb.60(0x40000000), %bb.54(0x40000000)
+ ; GFX90A-NEXT: bb.51.bb80:
+ ; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.52(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr54_sgpr55:0x000000000000000F, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr15 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def dead $scc
; GFX90A-NEXT: S_CMP_EQ_U32 killed renamable $sgpr15, 0, implicit-def $scc
; GFX90A-NEXT: renamable $vgpr6 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr7, dead renamable $sgpr48_sgpr49 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.60, implicit killed $scc
+ ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.62, implicit killed $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.54:
- ; GFX90A-NEXT: successors: %bb.62(0x80000000)
+ ; GFX90A-NEXT: bb.52:
+ ; GFX90A-NEXT: successors: %bb.64(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr58_sgpr59, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
@@ -740,16 +726,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_BRANCH %bb.62
+ ; GFX90A-NEXT: S_BRANCH %bb.64
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.55.bb73:
- ; GFX90A-NEXT: successors: %bb.53(0x40000000), %bb.56(0x40000000)
+ ; GFX90A-NEXT: bb.53.bb73:
+ ; GFX90A-NEXT: successors: %bb.51(0x40000000), %bb.54(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr54_sgpr55:0x000000000000000F, $sgpr62_sgpr63, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50_sgpr51
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr6 = GLOBAL_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec :: (load (s8) from %ir.i74, addrspace 1)
@@ -765,27 +751,27 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr58_sgpr59 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.53, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.51, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.56.Flow29:
- ; GFX90A-NEXT: successors: %bb.46(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.54.Flow29:
+ ; GFX90A-NEXT: successors: %bb.44(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr58_sgpr59, implicit-def $scc
- ; GFX90A-NEXT: S_BRANCH %bb.46
+ ; GFX90A-NEXT: S_BRANCH %bb.44
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.57.bb90:
- ; GFX90A-NEXT: successors: %bb.61(0x80000000)
+ ; GFX90A-NEXT: bb.55.bb90:
+ ; GFX90A-NEXT: successors: %bb.63(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr50_sgpr51, $sgpr54_sgpr55:0x000000000000000F, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr53 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr62_sgpr63, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr51 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr62_sgpr63, implicit $exec
; GFX90A-NEXT: renamable $vgpr10 = V_MOV_B32_e32 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr14_vgpr15 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr21, implicit $exec
@@ -794,17 +780,70 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr54, implicit $exec
; GFX90A-NEXT: renamable $vgpr11 = V_ALIGNBIT_B32_e64 killed $sgpr55, killed $vgpr10, 1, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr52 = V_ALIGNBIT_B32_e64 $vgpr17, $vgpr16, 1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr50 = V_ALIGNBIT_B32_e64 $vgpr17, $vgpr16, 1, implicit $exec
; GFX90A-NEXT: renamable $vgpr17 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr8_sgpr9, implicit $exec
; GFX90A-NEXT: renamable $vgpr15 = V_ALIGNBIT_B32_e64 $vgpr15, $vgpr14, 1, implicit $exec
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_OR_B64 renamable $sgpr28_sgpr29, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $vgpr14, implicit $exec
- ; GFX90A-NEXT: S_BRANCH %bb.61
+ ; GFX90A-NEXT: S_BRANCH %bb.63
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.56.bb103:
+ ; GFX90A-NEXT: successors: %bb.58(0x40000000), %bb.57(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.58, implicit $vcc
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.57:
+ ; GFX90A-NEXT: successors: %bb.59(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
+ ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
+ ; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
+ ; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 0
+ ; GFX90A-NEXT: S_BRANCH %bb.59
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.58.bb105:
+ ; GFX90A-NEXT: successors: %bb.59(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr23, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr20_vgpr21 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.434, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr18_vgpr19 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.435, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr22_vgpr23 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr52 = V_ASHRREV_I32_e32 31, $vgpr2, implicit $exec
+ ; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr23 = S_MOV_B32 0
+ ; GFX90A-NEXT: renamable $sgpr15 = S_MOV_B32 0
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.58:
- ; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $exec, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr23, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr42_sgpr43, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.59.Flow18:
+ ; GFX90A-NEXT: successors: %bb.60(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr23, $sgpr33, $vgpr31, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: renamable $vgpr25 = COPY renamable $vgpr2, implicit $exec
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.60.Flow17:
+ ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.61(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr23, $sgpr33, $vgpr25, $vgpr31, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
+ ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.61:
+ ; GFX90A-NEXT: successors: %bb.5(0x80000000)
+ ; GFX90A-NEXT: liveins: $exec, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr23, $vgpr25, $vgpr30, $vgpr31, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr42_sgpr43, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr15 = COPY killed renamable $sgpr23, implicit $exec
; GFX90A-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr15, implicit $exec
@@ -831,35 +870,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr40_vgpr41 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr46_vgpr47 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = COPY renamable $vgpr15, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr52 = COPY renamable $vgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr50 = COPY renamable $vgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr16 = COPY renamable $vgpr15, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr53 = COPY renamable $vgpr17, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr51 = COPY renamable $vgpr17, implicit $exec
; GFX90A-NEXT: renamable $vgpr13 = COPY renamable $vgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr12 = COPY renamable $vgpr15, implicit $exec
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
- ; GFX90A-NEXT: S_BRANCH %bb.7
+ ; GFX90A-NEXT: S_BRANCH %bb.5
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.59.bb105:
- ; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr22_vgpr23 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr23, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr20_vgpr21 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.434, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr18_vgpr19 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr15, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr10_vgpr11 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.435, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr24_vgpr25 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
- ; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr23 = S_MOV_B32 0
- ; GFX90A-NEXT: renamable $sgpr15 = S_MOV_B32 0
- ; GFX90A-NEXT: S_BRANCH %bb.3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.60.bb85:
- ; GFX90A-NEXT: successors: %bb.57(0x40000000), %bb.61(0x40000000)
+ ; GFX90A-NEXT: bb.62.bb85:
+ ; GFX90A-NEXT: successors: %bb.55(0x40000000), %bb.63(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr54_sgpr55:0x000000000000000F, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr8 = V_OR_B32_e32 1, $vgpr6, implicit $exec
@@ -872,24 +892,24 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: $sgpr50_sgpr51 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.57, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.55, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.61.Flow31:
- ; GFX90A-NEXT: successors: %bb.62(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.63.Flow31:
+ ; GFX90A-NEXT: successors: %bb.64(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr50_sgpr51, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.62.Flow30:
- ; GFX90A-NEXT: successors: %bb.56(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.64.Flow30:
+ ; GFX90A-NEXT: successors: %bb.54(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
@@ -897,134 +917,134 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_ANDN2_B64 renamable $sgpr28_sgpr29, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_OR_B64 killed renamable $sgpr48_sgpr49, killed renamable $sgpr54_sgpr55, implicit-def dead $scc
- ; GFX90A-NEXT: S_BRANCH %bb.56
+ ; GFX90A-NEXT: S_BRANCH %bb.54
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.63.bb140:
- ; GFX90A-NEXT: successors: %bb.69(0x40000000), %bb.64(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.65.bb140:
+ ; GFX90A-NEXT: successors: %bb.71(0x40000000), %bb.66(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr25, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr26_sgpr27, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.69, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.64.Flow13:
- ; GFX90A-NEXT: successors: %bb.65(0x40000000), %bb.67(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.66.Flow13:
+ ; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.69(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr28_sgpr29, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.67, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.69, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.65.bb159:
- ; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.66(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.67.bb159:
+ ; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.68(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr30, implicit $exec
; GFX90A-NEXT: $sgpr8_sgpr9 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr8_sgpr9 = S_XOR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.68, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.70, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.66.Flow10:
- ; GFX90A-NEXT: successors: %bb.67(0x80000000)
+ ; GFX90A-NEXT: bb.68.Flow10:
+ ; GFX90A-NEXT: successors: %bb.69(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr8_sgpr9 = S_ANDN2_SAVEEXEC_B64 $sgpr8_sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def $scc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.67.Flow14:
- ; GFX90A-NEXT: successors: %bb.8(0x80000000)
+ ; GFX90A-NEXT: bb.69.Flow14:
+ ; GFX90A-NEXT: successors: %bb.6(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = COPY $exec
- ; GFX90A-NEXT: S_BRANCH %bb.8
+ ; GFX90A-NEXT: S_BRANCH %bb.6
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.68.bb161:
- ; GFX90A-NEXT: successors: %bb.66(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.70.bb161:
+ ; GFX90A-NEXT: successors: %bb.68(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr21, killed $vgpr23, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr25, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr21, killed $vgpr52, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr23, implicit $exec
; GFX90A-NEXT: renamable $vgpr3 = V_OR_B32_e32 killed $vgpr11, killed $vgpr19, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr3, killed $vgpr2, implicit $exec
; GFX90A-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec
- ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr53, 0, $vgpr3, 0, 0, 6, implicit $exec
+ ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr51, 0, $vgpr3, 0, 0, 6, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_CNDMASK_B32_e64 0, 0, 0, killed $vgpr2, killed $vcc, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 killed $vgpr52, killed $vgpr13, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 killed $vgpr50, killed $vgpr13, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr10, killed $vgpr2, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U32_sdwa 0, killed $vgpr17, 0, $vgpr3, 0, 0, 6, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_CNDMASK_B32_e64 0, 0, 0, killed $vgpr2, killed $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr15, implicit $exec
; GFX90A-NEXT: DS_WRITE2_B32_gfx9 killed renamable $vgpr3, killed renamable $vgpr2, renamable $vgpr3, 0, 1, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, align 4, addrspace 3)
- ; GFX90A-NEXT: S_BRANCH %bb.66
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.69.bb174:
- ; GFX90A-NEXT: successors: %bb.73(0x40000000), %bb.70(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr26 = V_OR_B32_e32 1, $vgpr24, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr26, $vgpr22, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 $vgpr48, $vgpr20, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr28 = V_CNDMASK_B32_e64 0, $vgpr34, 0, 0, $sgpr8_sgpr9, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr38 = V_OR_B32_e32 $vgpr28, $vgpr18, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr36 = V_OR_B32_e32 $vgpr38, $vgpr10, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr32 = V_OR_B32_e32 $vgpr36, $vgpr12, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr50 = V_CNDMASK_B32_e64 0, 0, 0, $vgpr32, killed $sgpr8_sgpr9, implicit $exec
+ ; GFX90A-NEXT: S_BRANCH %bb.68
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: bb.71.bb174:
+ ; GFX90A-NEXT: successors: %bb.75(0x40000000), %bb.72(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr25, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: renamable $vgpr24 = V_OR_B32_e32 1, $vgpr22, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr38 = V_OR_B32_e32 $vgpr24, killed $vgpr25, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr32 = V_OR_B32_e32 $vgpr38, $vgpr20, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr26 = V_CNDMASK_B32_e64 0, $vgpr32, 0, 0, $sgpr8_sgpr9, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr36 = V_OR_B32_e32 $vgpr26, $vgpr18, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 $vgpr36, $vgpr10, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr28 = V_OR_B32_e32 $vgpr34, $vgpr12, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr48 = V_CNDMASK_B32_e64 0, 0, 0, $vgpr28, killed $sgpr8_sgpr9, implicit $exec
; GFX90A-NEXT: renamable $sgpr8_sgpr9 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.73, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.75, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.70.Flow:
- ; GFX90A-NEXT: successors: %bb.71(0x40000000), %bb.72(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.72.Flow:
+ ; GFX90A-NEXT: successors: %bb.73(0x40000000), %bb.74(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.72, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.74, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.71.bb186:
- ; GFX90A-NEXT: successors: %bb.72(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.73.bb186:
+ ; GFX90A-NEXT: successors: %bb.74(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 3, killed $vgpr2_vgpr3, implicit $exec
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr19, implicit $exec
; GFX90A-NEXT: renamable $vgpr2, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr18, $vgpr2, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr3, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr10, killed $vgpr3, killed $vcc, 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr27 = V_MOV_B32_e32 0, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr49 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr35 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr39 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr37 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr29 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr51 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr33 = COPY renamable $vgpr27, implicit $exec
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr27, renamable $vgpr26_vgpr27, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: renamable $vgpr25 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr39 = COPY renamable $vgpr25, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr33 = COPY renamable $vgpr25, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr37 = COPY renamable $vgpr25, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr35 = COPY renamable $vgpr25, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr27 = COPY renamable $vgpr25, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr49 = COPY renamable $vgpr25, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr29 = COPY renamable $vgpr25, implicit $exec
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr25, renamable $vgpr24_vgpr25, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr21, implicit $exec
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr38_vgpr39, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
; GFX90A-NEXT: renamable $vgpr12 = COPY killed renamable $sgpr22, implicit $exec
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr12, killed renamable $vgpr34_vgpr35, 0, 0, implicit $exec :: (store (s64) into %ir.8, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr27, killed renamable $vgpr38_vgpr39, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr36_vgpr37, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr27, killed renamable $vgpr28_vgpr29, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr10, killed renamable $vgpr50_vgpr51, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
- ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr27, killed renamable $vgpr32_vgpr33, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr12, killed renamable $vgpr32_vgpr33, 0, 0, implicit $exec :: (store (s64) into %ir.8, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr25, killed renamable $vgpr36_vgpr37, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr10, killed renamable $vgpr34_vgpr35, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 renamable $vgpr25, killed renamable $vgpr26_vgpr27, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr10, killed renamable $vgpr48_vgpr49, 0, 0, implicit $exec :: (store (s64) into %ir.7, addrspace 3)
+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr25, killed renamable $vgpr28_vgpr29, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.72.Flow9:
- ; GFX90A-NEXT: successors: %bb.64(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.74.Flow9:
+ ; GFX90A-NEXT: successors: %bb.66(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 0
- ; GFX90A-NEXT: S_BRANCH %bb.64
+ ; GFX90A-NEXT: S_BRANCH %bb.66
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.73.bb196:
- ; GFX90A-NEXT: successors: %bb.70(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.75.bb196:
+ ; GFX90A-NEXT: successors: %bb.72(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 $vgpr50, killed $vgpr16, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 $vgpr48, killed $vgpr16, implicit $exec
; GFX90A-NEXT: renamable $vgpr54 = V_OR_B32_e32 killed $vgpr10, killed $vgpr14, implicit $exec
; GFX90A-NEXT: renamable $vgpr55 = V_MOV_B32_e32 0, implicit $exec
; GFX90A-NEXT: DS_WRITE_B64_gfx9 killed renamable $vgpr55, renamable $vgpr54_vgpr55, 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(3) null`, addrspace 3)
; GFX90A-NEXT: renamable $sgpr8_sgpr9 = S_MOV_B64 0
- ; GFX90A-NEXT: S_BRANCH %bb.70
+ ; GFX90A-NEXT: S_BRANCH %bb.72
bb:
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
%i11 = icmp eq i32 %i, 0
diff --git a/llvm/test/CodeGen/NVPTX/load-store.ll b/llvm/test/CodeGen/NVPTX/load-store.ll
index 8435e016096621..21ac6a37751d87 100644
--- a/llvm/test/CodeGen/NVPTX/load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store.ll
@@ -27,9 +27,9 @@ define void @generic_weak(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
; CHECK-LABEL: generic_weak(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<29>;
-; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .b32 %r<35>;
; CHECK-NEXT: .reg .f32 %f<15>;
-; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .b64 %rd<25>;
; CHECK-NEXT: .reg .f64 %fd<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -79,9 +79,7 @@ define void @generic_weak(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
-; CHECK-NEXT: st.u32 [%rd3], %r14;
-; CHECK-NEXT: ld.u32 %r15, [%rd3];
-; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r14;
; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
@@ -89,34 +87,52 @@ define void @generic_weak(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
; CHECK-NEXT: ld.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: mov.b32 %r17, {%rs26, %rs25};
; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
-; CHECK-NEXT: st.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
-; CHECK-NEXT: ld.v2.u32 {%r17, %r18}, [%rd4];
-; CHECK-NEXT: add.s32 %r19, %r18, 1;
-; CHECK-NEXT: add.s32 %r20, %r17, 1;
-; CHECK-NEXT: st.v2.u32 [%rd4], {%r20, %r19};
-; CHECK-NEXT: ld.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
-; CHECK-NEXT: add.s32 %r25, %r24, 1;
-; CHECK-NEXT: add.s32 %r26, %r23, 1;
-; CHECK-NEXT: add.s32 %r27, %r22, 1;
-; CHECK-NEXT: add.s32 %r28, %r21, 1;
-; CHECK-NEXT: st.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
-; CHECK-NEXT: ld.v2.u64 {%rd7, %rd8}, [%rd4];
-; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
-; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
-; CHECK-NEXT: st.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: mov.b32 %r18, {%rs28, %rs27};
+; CHECK-NEXT: add.s32 %r21, %r17, 1;
+; CHECK-NEXT: add.s32 %r22, %r18, 1;
+; CHECK-NEXT: st.v2.u32 [%rd4], {%r22, %r21};
+; CHECK-NEXT: ld.v4.u32 {%r23, %r24, %r25, %r26}, [%rd4];
+; CHECK-NEXT: add.s32 %r27, %r23, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd7, %r27;
+; CHECK-NEXT: add.s32 %r28, %r24, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd8, %r28;
+; CHECK-NEXT: shl.b64 %rd9, %rd8, 32;
+; CHECK-NEXT: or.b64 %rd10, %rd7, %rd9;
+; CHECK-NEXT: add.s32 %r29, %r25, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd11, %r29;
+; CHECK-NEXT: add.s32 %r30, %r26, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd12, %r30;
+; CHECK-NEXT: shl.b64 %rd13, %rd12, 32;
+; CHECK-NEXT: or.b64 %rd14, %rd11, %rd13;
+; CHECK-NEXT: add.s64 %rd15, %rd14, 1;
+; CHECK-NEXT: add.s64 %rd16, %rd10, 1;
+; CHECK-NEXT: st.v2.u64 [%rd4], {%rd16, %rd15};
; CHECK-NEXT: ld.v2.f32 {%f3, %f4}, [%rd4];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: st.v2.f32 [%rd4], {%f6, %f5};
; CHECK-NEXT: ld.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
-; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
-; CHECK-NEXT: st.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
-; CHECK-NEXT: ld.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f9, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r31, %f11;
+; CHECK-NEXT: cvt.u64.u32 %rd17, %r31;
+; CHECK-NEXT: add.rn.f32 %f12, %f10, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r32, %f12;
+; CHECK-NEXT: cvt.u64.u32 %rd18, %r32;
+; CHECK-NEXT: shl.b64 %rd19, %rd18, 32;
+; CHECK-NEXT: or.b64 %rd20, %rd17, %rd19;
+; CHECK-NEXT: add.rn.f32 %f13, %f7, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r33, %f13;
+; CHECK-NEXT: cvt.u64.u32 %rd21, %r33;
+; CHECK-NEXT: add.rn.f32 %f14, %f8, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r34, %f14;
+; CHECK-NEXT: cvt.u64.u32 %rd22, %r34;
+; CHECK-NEXT: shl.b64 %rd23, %rd22, 32;
+; CHECK-NEXT: or.b64 %rd24, %rd21, %rd23;
+; CHECK-NEXT: mov.b64 %fd3, %rd24;
+; CHECK-NEXT: mov.b64 %fd4, %rd20;
; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
; CHECK-NEXT: st.v2.f64 [%rd4], {%fd6, %fd5};
@@ -696,9 +712,9 @@ define void @global_weak(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace
; CHECK-LABEL: global_weak(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<29>;
-; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .b32 %r<35>;
; CHECK-NEXT: .reg .f32 %f<15>;
-; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .b64 %rd<25>;
; CHECK-NEXT: .reg .f64 %fd<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -748,9 +764,7 @@ define void @global_weak(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace
; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
-; CHECK-NEXT: st.global.u32 [%rd3], %r14;
-; CHECK-NEXT: ld.global.u32 %r15, [%rd3];
-; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r14;
; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
@@ -758,34 +772,52 @@ define void @global_weak(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace
; CHECK-NEXT: ld.global.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: mov.b32 %r17, {%rs26, %rs25};
; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
-; CHECK-NEXT: st.global.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
-; CHECK-NEXT: ld.global.v2.u32 {%r17, %r18}, [%rd4];
-; CHECK-NEXT: add.s32 %r19, %r18, 1;
-; CHECK-NEXT: add.s32 %r20, %r17, 1;
-; CHECK-NEXT: st.global.v2.u32 [%rd4], {%r20, %r19};
-; CHECK-NEXT: ld.global.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
-; CHECK-NEXT: add.s32 %r25, %r24, 1;
-; CHECK-NEXT: add.s32 %r26, %r23, 1;
-; CHECK-NEXT: add.s32 %r27, %r22, 1;
-; CHECK-NEXT: add.s32 %r28, %r21, 1;
-; CHECK-NEXT: st.global.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
-; CHECK-NEXT: ld.global.v2.u64 {%rd7, %rd8}, [%rd4];
-; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
-; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
-; CHECK-NEXT: st.global.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: mov.b32 %r18, {%rs28, %rs27};
+; CHECK-NEXT: add.s32 %r21, %r17, 1;
+; CHECK-NEXT: add.s32 %r22, %r18, 1;
+; CHECK-NEXT: st.global.v2.u32 [%rd4], {%r22, %r21};
+; CHECK-NEXT: ld.global.v4.u32 {%r23, %r24, %r25, %r26}, [%rd4];
+; CHECK-NEXT: add.s32 %r27, %r23, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd7, %r27;
+; CHECK-NEXT: add.s32 %r28, %r24, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd8, %r28;
+; CHECK-NEXT: shl.b64 %rd9, %rd8, 32;
+; CHECK-NEXT: or.b64 %rd10, %rd7, %rd9;
+; CHECK-NEXT: add.s32 %r29, %r25, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd11, %r29;
+; CHECK-NEXT: add.s32 %r30, %r26, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd12, %r30;
+; CHECK-NEXT: shl.b64 %rd13, %rd12, 32;
+; CHECK-NEXT: or.b64 %rd14, %rd11, %rd13;
+; CHECK-NEXT: add.s64 %rd15, %rd14, 1;
+; CHECK-NEXT: add.s64 %rd16, %rd10, 1;
+; CHECK-NEXT: st.global.v2.u64 [%rd4], {%rd16, %rd15};
; CHECK-NEXT: ld.global.v2.f32 {%f3, %f4}, [%rd4];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: st.global.v2.f32 [%rd4], {%f6, %f5};
; CHECK-NEXT: ld.global.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
-; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
-; CHECK-NEXT: st.global.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
-; CHECK-NEXT: ld.global.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f9, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r31, %f11;
+; CHECK-NEXT: cvt.u64.u32 %rd17, %r31;
+; CHECK-NEXT: add.rn.f32 %f12, %f10, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r32, %f12;
+; CHECK-NEXT: cvt.u64.u32 %rd18, %r32;
+; CHECK-NEXT: shl.b64 %rd19, %rd18, 32;
+; CHECK-NEXT: or.b64 %rd20, %rd17, %rd19;
+; CHECK-NEXT: add.rn.f32 %f13, %f7, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r33, %f13;
+; CHECK-NEXT: cvt.u64.u32 %rd21, %r33;
+; CHECK-NEXT: add.rn.f32 %f14, %f8, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r34, %f14;
+; CHECK-NEXT: cvt.u64.u32 %rd22, %r34;
+; CHECK-NEXT: shl.b64 %rd23, %rd22, 32;
+; CHECK-NEXT: or.b64 %rd24, %rd21, %rd23;
+; CHECK-NEXT: mov.b64 %fd3, %rd24;
+; CHECK-NEXT: mov.b64 %fd4, %rd20;
; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
; CHECK-NEXT: st.global.v2.f64 [%rd4], {%fd6, %fd5};
@@ -1408,9 +1440,9 @@ define void @shared_weak(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace
; CHECK-LABEL: shared_weak(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<29>;
-; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .b32 %r<35>;
; CHECK-NEXT: .reg .f32 %f<15>;
-; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .b64 %rd<25>;
; CHECK-NEXT: .reg .f64 %fd<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -1460,9 +1492,7 @@ define void @shared_weak(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace
; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
-; CHECK-NEXT: st.shared.u32 [%rd3], %r14;
-; CHECK-NEXT: ld.shared.u32 %r15, [%rd3];
-; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r14;
; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
@@ -1470,34 +1500,52 @@ define void @shared_weak(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace
; CHECK-NEXT: ld.shared.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: mov.b32 %r17, {%rs26, %rs25};
; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
-; CHECK-NEXT: st.shared.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
-; CHECK-NEXT: ld.shared.v2.u32 {%r17, %r18}, [%rd4];
-; CHECK-NEXT: add.s32 %r19, %r18, 1;
-; CHECK-NEXT: add.s32 %r20, %r17, 1;
-; CHECK-NEXT: st.shared.v2.u32 [%rd4], {%r20, %r19};
-; CHECK-NEXT: ld.shared.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
-; CHECK-NEXT: add.s32 %r25, %r24, 1;
-; CHECK-NEXT: add.s32 %r26, %r23, 1;
-; CHECK-NEXT: add.s32 %r27, %r22, 1;
-; CHECK-NEXT: add.s32 %r28, %r21, 1;
-; CHECK-NEXT: st.shared.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
-; CHECK-NEXT: ld.shared.v2.u64 {%rd7, %rd8}, [%rd4];
-; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
-; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
-; CHECK-NEXT: st.shared.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: mov.b32 %r18, {%rs28, %rs27};
+; CHECK-NEXT: add.s32 %r21, %r17, 1;
+; CHECK-NEXT: add.s32 %r22, %r18, 1;
+; CHECK-NEXT: st.shared.v2.u32 [%rd4], {%r22, %r21};
+; CHECK-NEXT: ld.shared.v4.u32 {%r23, %r24, %r25, %r26}, [%rd4];
+; CHECK-NEXT: add.s32 %r27, %r23, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd7, %r27;
+; CHECK-NEXT: add.s32 %r28, %r24, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd8, %r28;
+; CHECK-NEXT: shl.b64 %rd9, %rd8, 32;
+; CHECK-NEXT: or.b64 %rd10, %rd7, %rd9;
+; CHECK-NEXT: add.s32 %r29, %r25, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd11, %r29;
+; CHECK-NEXT: add.s32 %r30, %r26, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd12, %r30;
+; CHECK-NEXT: shl.b64 %rd13, %rd12, 32;
+; CHECK-NEXT: or.b64 %rd14, %rd11, %rd13;
+; CHECK-NEXT: add.s64 %rd15, %rd14, 1;
+; CHECK-NEXT: add.s64 %rd16, %rd10, 1;
+; CHECK-NEXT: st.shared.v2.u64 [%rd4], {%rd16, %rd15};
; CHECK-NEXT: ld.shared.v2.f32 {%f3, %f4}, [%rd4];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: st.shared.v2.f32 [%rd4], {%f6, %f5};
; CHECK-NEXT: ld.shared.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
-; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
-; CHECK-NEXT: st.shared.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
-; CHECK-NEXT: ld.shared.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f9, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r31, %f11;
+; CHECK-NEXT: cvt.u64.u32 %rd17, %r31;
+; CHECK-NEXT: add.rn.f32 %f12, %f10, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r32, %f12;
+; CHECK-NEXT: cvt.u64.u32 %rd18, %r32;
+; CHECK-NEXT: shl.b64 %rd19, %rd18, 32;
+; CHECK-NEXT: or.b64 %rd20, %rd17, %rd19;
+; CHECK-NEXT: add.rn.f32 %f13, %f7, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r33, %f13;
+; CHECK-NEXT: cvt.u64.u32 %rd21, %r33;
+; CHECK-NEXT: add.rn.f32 %f14, %f8, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r34, %f14;
+; CHECK-NEXT: cvt.u64.u32 %rd22, %r34;
+; CHECK-NEXT: shl.b64 %rd23, %rd22, 32;
+; CHECK-NEXT: or.b64 %rd24, %rd21, %rd23;
+; CHECK-NEXT: mov.b64 %fd3, %rd24;
+; CHECK-NEXT: mov.b64 %fd4, %rd20;
; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
; CHECK-NEXT: st.shared.v2.f64 [%rd4], {%fd6, %fd5};
@@ -2052,9 +2100,9 @@ define void @local_weak(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(
; CHECK-LABEL: local_weak(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<29>;
-; CHECK-NEXT: .reg .b32 %r<29>;
+; CHECK-NEXT: .reg .b32 %r<35>;
; CHECK-NEXT: .reg .f32 %f<15>;
-; CHECK-NEXT: .reg .b64 %rd<11>;
+; CHECK-NEXT: .reg .b64 %rd<25>;
; CHECK-NEXT: .reg .f64 %fd<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -2104,9 +2152,7 @@ define void @local_weak(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(
; CHECK-NEXT: add.s16 %rs16, %rs15, 1;
; CHECK-NEXT: cvt.u32.u16 %r13, %rs16;
; CHECK-NEXT: bfi.b32 %r14, %r13, %r11, 24, 8;
-; CHECK-NEXT: st.local.u32 [%rd3], %r14;
-; CHECK-NEXT: ld.local.u32 %r15, [%rd3];
-; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r15;
+; CHECK-NEXT: mov.b32 {%rs17, %rs18}, %r14;
; CHECK-NEXT: add.s16 %rs19, %rs18, 1;
; CHECK-NEXT: add.s16 %rs20, %rs17, 1;
; CHECK-NEXT: mov.b32 %r16, {%rs20, %rs19};
@@ -2114,34 +2160,52 @@ define void @local_weak(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(
; CHECK-NEXT: ld.local.v4.u16 {%rs21, %rs22, %rs23, %rs24}, [%rd4];
; CHECK-NEXT: add.s16 %rs25, %rs24, 1;
; CHECK-NEXT: add.s16 %rs26, %rs23, 1;
+; CHECK-NEXT: mov.b32 %r17, {%rs26, %rs25};
; CHECK-NEXT: add.s16 %rs27, %rs22, 1;
; CHECK-NEXT: add.s16 %rs28, %rs21, 1;
-; CHECK-NEXT: st.local.v4.u16 [%rd4], {%rs28, %rs27, %rs26, %rs25};
-; CHECK-NEXT: ld.local.v2.u32 {%r17, %r18}, [%rd4];
-; CHECK-NEXT: add.s32 %r19, %r18, 1;
-; CHECK-NEXT: add.s32 %r20, %r17, 1;
-; CHECK-NEXT: st.local.v2.u32 [%rd4], {%r20, %r19};
-; CHECK-NEXT: ld.local.v4.u32 {%r21, %r22, %r23, %r24}, [%rd4];
-; CHECK-NEXT: add.s32 %r25, %r24, 1;
-; CHECK-NEXT: add.s32 %r26, %r23, 1;
-; CHECK-NEXT: add.s32 %r27, %r22, 1;
-; CHECK-NEXT: add.s32 %r28, %r21, 1;
-; CHECK-NEXT: st.local.v4.u32 [%rd4], {%r28, %r27, %r26, %r25};
-; CHECK-NEXT: ld.local.v2.u64 {%rd7, %rd8}, [%rd4];
-; CHECK-NEXT: add.s64 %rd9, %rd8, 1;
-; CHECK-NEXT: add.s64 %rd10, %rd7, 1;
-; CHECK-NEXT: st.local.v2.u64 [%rd4], {%rd10, %rd9};
+; CHECK-NEXT: mov.b32 %r18, {%rs28, %rs27};
+; CHECK-NEXT: add.s32 %r21, %r17, 1;
+; CHECK-NEXT: add.s32 %r22, %r18, 1;
+; CHECK-NEXT: st.local.v2.u32 [%rd4], {%r22, %r21};
+; CHECK-NEXT: ld.local.v4.u32 {%r23, %r24, %r25, %r26}, [%rd4];
+; CHECK-NEXT: add.s32 %r27, %r23, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd7, %r27;
+; CHECK-NEXT: add.s32 %r28, %r24, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd8, %r28;
+; CHECK-NEXT: shl.b64 %rd9, %rd8, 32;
+; CHECK-NEXT: or.b64 %rd10, %rd7, %rd9;
+; CHECK-NEXT: add.s32 %r29, %r25, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd11, %r29;
+; CHECK-NEXT: add.s32 %r30, %r26, 1;
+; CHECK-NEXT: cvt.u64.u32 %rd12, %r30;
+; CHECK-NEXT: shl.b64 %rd13, %rd12, 32;
+; CHECK-NEXT: or.b64 %rd14, %rd11, %rd13;
+; CHECK-NEXT: add.s64 %rd15, %rd14, 1;
+; CHECK-NEXT: add.s64 %rd16, %rd10, 1;
+; CHECK-NEXT: st.local.v2.u64 [%rd4], {%rd16, %rd15};
; CHECK-NEXT: ld.local.v2.f32 {%f3, %f4}, [%rd4];
; CHECK-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECK-NEXT: add.rn.f32 %f6, %f3, 0f3F800000;
; CHECK-NEXT: st.local.v2.f32 [%rd4], {%f6, %f5};
; CHECK-NEXT: ld.local.v4.f32 {%f7, %f8, %f9, %f10}, [%rd4];
-; CHECK-NEXT: add.rn.f32 %f11, %f10, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f12, %f9, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f13, %f8, 0f3F800000;
-; CHECK-NEXT: add.rn.f32 %f14, %f7, 0f3F800000;
-; CHECK-NEXT: st.local.v4.f32 [%rd4], {%f14, %f13, %f12, %f11};
-; CHECK-NEXT: ld.local.v2.f64 {%fd3, %fd4}, [%rd4];
+; CHECK-NEXT: add.rn.f32 %f11, %f9, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r31, %f11;
+; CHECK-NEXT: cvt.u64.u32 %rd17, %r31;
+; CHECK-NEXT: add.rn.f32 %f12, %f10, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r32, %f12;
+; CHECK-NEXT: cvt.u64.u32 %rd18, %r32;
+; CHECK-NEXT: shl.b64 %rd19, %rd18, 32;
+; CHECK-NEXT: or.b64 %rd20, %rd17, %rd19;
+; CHECK-NEXT: add.rn.f32 %f13, %f7, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r33, %f13;
+; CHECK-NEXT: cvt.u64.u32 %rd21, %r33;
+; CHECK-NEXT: add.rn.f32 %f14, %f8, 0f3F800000;
+; CHECK-NEXT: mov.b32 %r34, %f14;
+; CHECK-NEXT: cvt.u64.u32 %rd22, %r34;
+; CHECK-NEXT: shl.b64 %rd23, %rd22, 32;
+; CHECK-NEXT: or.b64 %rd24, %rd21, %rd23;
+; CHECK-NEXT: mov.b64 %fd3, %rd24;
+; CHECK-NEXT: mov.b64 %fd4, %rd20;
; CHECK-NEXT: add.rn.f64 %fd5, %fd4, 0d3FF0000000000000;
; CHECK-NEXT: add.rn.f64 %fd6, %fd3, 0d3FF0000000000000;
; CHECK-NEXT: st.local.v2.f64 [%rd4], {%fd6, %fd5};
diff --git a/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll b/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll
index 5bd3580f5e95ec..3216f3c548308e 100644
--- a/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll
+++ b/llvm/test/CodeGen/PowerPC/big-endian-store-forward.ll
@@ -10,7 +10,6 @@ define void @f(i16 %v) {
; CHECK-NEXT: addis 4, 2, .LC0 at toc@ha
; CHECK-NEXT: sth 3, -2(1)
; CHECK-NEXT: ld 4, .LC0 at toc@l(4)
-; CHECK-NEXT: lbz 3, -2(1)
; CHECK-NEXT: stb 3, 0(4)
; CHECK-NEXT: blr
%p32 = alloca i16
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
index ac9641ff35b0cb..728e5431217aa0 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll
@@ -31,28 +31,22 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 {
; CHECK-NEXT: cmpdi cr1, r4, 0
; CHECK-NEXT: cmpdi cr5, r5, 0
; CHECK-NEXT: cmpldi cr6, r3, 0
-; CHECK-NEXT: beq cr6, .LBB0_3
-; CHECK-NEXT: # %bb.1: # %bb10
-; CHECK-NEXT: lwz r3, 0(r3)
-; CHECK-NEXT: bc 12, 4*cr1+eq, .LBB0_4
-; CHECK-NEXT: .LBB0_2: # %bb14
+; CHECK-NEXT: bc 12, 4*cr1+eq, .LBB0_2
+; CHECK-NEXT: # %bb.1: # %bb14
; CHECK-NEXT: lwz r5, 0(r3)
-; CHECK-NEXT: b .LBB0_5
-; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: # implicit-def: $r3
-; CHECK-NEXT: bc 4, 4*cr1+eq, .LBB0_2
-; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: # implicit-def: $r5
-; CHECK-NEXT: .LBB0_5: # %bb16
+; CHECK-NEXT: .LBB0_3: # %bb16
; CHECK-NEXT: crnot 4*cr1+lt, eq
; CHECK-NEXT: crnot 4*cr5+un, 4*cr5+eq
-; CHECK-NEXT: bc 12, 4*cr5+eq, .LBB0_7
-; CHECK-NEXT: # %bb.6: # %bb18
+; CHECK-NEXT: bc 12, 4*cr5+eq, .LBB0_5
+; CHECK-NEXT: # %bb.4: # %bb18
; CHECK-NEXT: lwz r4, 0(r3)
-; CHECK-NEXT: b .LBB0_8
-; CHECK-NEXT: .LBB0_7:
+; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: .LBB0_5:
; CHECK-NEXT: # implicit-def: $r4
-; CHECK-NEXT: .LBB0_8: # %bb20
+; CHECK-NEXT: .LBB0_6: # %bb20
; CHECK-NEXT: mfcr r12
; CHECK-NEXT: cmpwi cr2, r3, -1
; CHECK-NEXT: cmpwi cr3, r4, -1
@@ -62,38 +56,38 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 {
; CHECK-NEXT: crand 4*cr5+gt, 4*cr2+gt, 4*cr1+lt
; CHECK-NEXT: crand 4*cr5+lt, 4*cr3+gt, 4*cr5+un
; CHECK-NEXT: # implicit-def: $x3
-; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_10
-; CHECK-NEXT: # %bb.9: # %bb34
+; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_8
+; CHECK-NEXT: # %bb.7: # %bb34
; CHECK-NEXT: ld r3, 0(r3)
-; CHECK-NEXT: .LBB0_10: # %bb36
+; CHECK-NEXT: .LBB0_8: # %bb36
; CHECK-NEXT: cmpwi cr2, r5, 0
; CHECK-NEXT: # implicit-def: $x4
-; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_12
-; CHECK-NEXT: # %bb.11: # %bb38
+; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10
+; CHECK-NEXT: # %bb.9: # %bb38
; CHECK-NEXT: ld r4, 0(r3)
-; CHECK-NEXT: .LBB0_12: # %bb40
+; CHECK-NEXT: .LBB0_10: # %bb40
; CHECK-NEXT: crand 4*cr6+gt, 4*cr7+lt, 4*cr1+lt
; CHECK-NEXT: crand 4*cr6+lt, 4*cr6+lt, 4*cr5+un
; CHECK-NEXT: crnot 4*cr6+un, 4*cr1+eq
; CHECK-NEXT: # implicit-def: $x6
-; CHECK-NEXT: bc 4, 4*cr6+lt, .LBB0_14
-; CHECK-NEXT: # %bb.13: # %bb48
+; CHECK-NEXT: bc 4, 4*cr6+lt, .LBB0_12
+; CHECK-NEXT: # %bb.11: # %bb48
; CHECK-NEXT: ld r6, 0(r3)
-; CHECK-NEXT: .LBB0_14: # %bb50
+; CHECK-NEXT: .LBB0_12: # %bb50
; CHECK-NEXT: cmpwi cr3, r5, -1
; CHECK-NEXT: crand 4*cr7+lt, 4*cr2+lt, 4*cr6+un
; CHECK-NEXT: # implicit-def: $r5
-; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_16
-; CHECK-NEXT: # %bb.15: # %bb52
+; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_14
+; CHECK-NEXT: # %bb.13: # %bb52
; CHECK-NEXT: lwz r5, 0(r3)
-; CHECK-NEXT: .LBB0_16: # %bb54
+; CHECK-NEXT: .LBB0_14: # %bb54
; CHECK-NEXT: mfocrf r7, 128
; CHECK-NEXT: stw r7, -4(r1)
; CHECK-NEXT: # implicit-def: $r7
-; CHECK-NEXT: bc 4, 4*cr7+lt, .LBB0_18
-; CHECK-NEXT: # %bb.17: # %bb56
+; CHECK-NEXT: bc 4, 4*cr7+lt, .LBB0_16
+; CHECK-NEXT: # %bb.15: # %bb56
; CHECK-NEXT: lwz r7, 0(r3)
-; CHECK-NEXT: .LBB0_18: # %bb58
+; CHECK-NEXT: .LBB0_16: # %bb58
; CHECK-NEXT: lwz r6, 92(r6)
; CHECK-NEXT: crand 4*cr7+un, 4*cr3+gt, 4*cr6+un
; CHECK-NEXT: cmpwi cr3, r5, 1
@@ -106,10 +100,10 @@ define dso_local double @P10_Spill_CR_EQ(ptr %arg) local_unnamed_addr #0 {
; CHECK-NEXT: crand 4*cr7+lt, 4*cr4+lt, 4*cr7+lt
; CHECK-NEXT: cmpwi r6, 1
; CHECK-NEXT: crand 4*cr6+lt, lt, 4*cr6+lt
-; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_20
-; CHECK-NEXT: # %bb.19: # %bb68
+; CHECK-NEXT: bc 4, 4*cr6+gt, .LBB0_18
+; CHECK-NEXT: # %bb.17: # %bb68
; CHECK-NEXT: ld r5, 0(r3)
-; CHECK-NEXT: .LBB0_20: # %bb70
+; CHECK-NEXT: .LBB0_18: # %bb70
; CHECK-NEXT: ld r6, 0(r3)
; CHECK-NEXT: lwz r9, -4(r1)
; CHECK-NEXT: crandc 4*cr5+gt, 4*cr5+gt, 4*cr7+eq
diff --git a/llvm/test/Transforms/EarlyCSE/invariant.start.ll b/llvm/test/Transforms/EarlyCSE/invariant.start.ll
index 554d3ce519b5ee..ad25137d20f466 100644
--- a/llvm/test/Transforms/EarlyCSE/invariant.start.ll
+++ b/llvm/test/Transforms/EarlyCSE/invariant.start.ll
@@ -472,15 +472,22 @@ define void @test_dse_after_load(ptr %p, i1 %cnd) {
; typed due to the user of a Value to represent the address. Note that other
; passes will canonicalize away the bitcasts in this example.
define i32 @test_false_negative_types(ptr %p) {
-; CHECK-LABEL: define {{[^@]+}}@test_false_negative_types
-; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[P]])
-; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4
-; CHECK-NEXT: call void @clobber()
-; CHECK-NEXT: [[V2F:%.*]] = load float, ptr [[P]], align 4
-; CHECK-NEXT: [[V2:%.*]] = bitcast float [[V2F]] to i32
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[V1]], [[V2]]
-; CHECK-NEXT: ret i32 [[SUB]]
+; NO_ASSUME-LABEL: define {{[^@]+}}@test_false_negative_types
+; NO_ASSUME-SAME: (ptr [[P:%.*]]) {
+; NO_ASSUME-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[P]])
+; NO_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4
+; NO_ASSUME-NEXT: [[TMP2:%.*]] = bitcast i32 [[V1]] to float
+; NO_ASSUME-NEXT: call void @clobber()
+; NO_ASSUME-NEXT: ret i32 0
+;
+; USE_ASSUME-LABEL: define {{[^@]+}}@test_false_negative_types
+; USE_ASSUME-SAME: (ptr [[P:%.*]]) {
+; USE_ASSUME-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[P]])
+; USE_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4
+; USE_ASSUME-NEXT: [[TMP2:%.*]] = bitcast i32 [[V1]] to float
+; USE_ASSUME-NEXT: call void @clobber()
+; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 4), "nonnull"(ptr [[P]]), "align"(ptr [[P]], i64 4) ]
+; USE_ASSUME-NEXT: ret i32 0
;
call ptr @llvm.invariant.start.p0(i64 4, ptr %p)
%v1 = load i32, ptr %p
@@ -571,13 +578,13 @@ define i32 @test_false_negative_scope(ptr %p) {
define i32 @test_invariant_load_scope(ptr %p) {
; NO_ASSUME-LABEL: define {{[^@]+}}@test_invariant_load_scope
; NO_ASSUME-SAME: (ptr [[P:%.*]]) {
-; NO_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !invariant.load !4
+; NO_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META4:![0-9]+]]
; NO_ASSUME-NEXT: call void @clobber()
; NO_ASSUME-NEXT: ret i32 0
;
; USE_ASSUME-LABEL: define {{[^@]+}}@test_invariant_load_scope
; USE_ASSUME-SAME: (ptr [[P:%.*]]) {
-; USE_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !invariant.load !4
+; USE_ASSUME-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META4:![0-9]+]]
; USE_ASSUME-NEXT: call void @clobber()
; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 4), "nonnull"(ptr [[P]]), "align"(ptr [[P]], i64 4) ]
; USE_ASSUME-NEXT: ret i32 0
@@ -589,7 +596,6 @@ define i32 @test_invariant_load_scope(ptr %p) {
ret i32 %sub
}
-; USE_ASSUME: declare void @llvm.assume(i1 noundef)
!0 = !{!1, !1, i64 0}
!1 = !{!"float", !2, i64 0}
diff --git a/llvm/test/Transforms/EarlyCSE/opaque-ptr.ll b/llvm/test/Transforms/EarlyCSE/opaque-ptr.ll
index da507f13730e87..b7283ab4b30833 100644
--- a/llvm/test/Transforms/EarlyCSE/opaque-ptr.ll
+++ b/llvm/test/Transforms/EarlyCSE/opaque-ptr.ll
@@ -4,10 +4,8 @@
define i32 @different_types_load(ptr %p) {
; CHECK-LABEL: @different_types_load(
; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[P]], align 4
-; CHECK-NEXT: [[V2_C:%.*]] = trunc i64 [[V2]] to i32
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[V1]], [[V2_C]]
-; CHECK-NEXT: ret i32 [[SUB]]
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[V1]] to i64
+; CHECK-NEXT: ret i32 0
;
%v1 = load i32, ptr %p
%v2 = load i64, ptr %p
@@ -36,10 +34,8 @@ define i32 @different_types_vector_load(ptr %p) {
define i32 @different_types_store(ptr %p, i32 %a) {
; CHECK-LABEL: @different_types_store(
; CHECK-NEXT: store i32 [[A:%.*]], ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[P]], align 4
-; CHECK-NEXT: [[V2_C:%.*]] = trunc i64 [[V2]] to i32
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A]], [[V2_C]]
-; CHECK-NEXT: ret i32 [[SUB]]
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT: ret i32 0
;
store i32 %a, ptr %p
%v2 = load i64, ptr %p
@@ -51,7 +47,7 @@ define i32 @different_types_store(ptr %p, i32 %a) {
define i32 @different_elt_types_vector_load(ptr %p, <4 x i1> %c) {
; CHECK-LABEL: @different_elt_types_vector_load(
; CHECK-NEXT: [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[P:%.*]], i32 4, <4 x i1> [[C:%.*]], <4 x i32> poison)
-; CHECK-NEXT: [[V2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> [[C]], <4 x float> poison)
+; CHECK-NEXT: [[V2:%.*]] = bitcast <4 x i32> [[V1]] to <4 x float>
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[V1]], i32 0
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x float> [[V2]], i32 0
; CHECK-NEXT: [[E2I:%.*]] = fptosi float [[E2]] to i32
@@ -70,7 +66,7 @@ define i32 @different_elt_types_vector_load(ptr %p, <4 x i1> %c) {
define float @different_elt_types_vector_store_load(ptr %p, <4 x i32> %v1, <4 x i1> %c) {
; CHECK-LABEL: @different_elt_types_vector_store_load(
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V1:%.*]], ptr [[P:%.*]], i32 4, <4 x i1> [[C:%.*]])
-; CHECK-NEXT: [[V2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> [[C]], <4 x float> poison)
+; CHECK-NEXT: [[V2:%.*]] = bitcast <4 x i32> [[V1]] to <4 x float>
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x float> [[V2]], i32 0
; CHECK-NEXT: ret float [[E2]]
;
>From a4d759c45650fa89bf68ed35006fa53e92f799e4 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 30 Oct 2024 14:06:45 +0000
Subject: [PATCH 3/3] EarlyCSE: address reviews
---
llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 25 +++++++++++--------------
1 file changed, 11 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 9714611cda8b0f..54b29a67fcab2c 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -966,25 +966,22 @@ class EarlyCSE {
const ParseMemoryInst &Later);
Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
- if (!isa<IntrinsicInst, LoadInst, StoreInst>(Inst))
- llvm_unreachable("Instruction not supported");
+ assert((isa<IntrinsicInst, LoadInst, StoreInst>(Inst)) &&
+ "Instruction not supported");
// The load or the store's first operand.
Value *V;
if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
- if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
- switch (II->getIntrinsicID()) {
- case Intrinsic::masked_load:
- V = II;
- break;
- case Intrinsic::masked_store:
- V = II->getOperand(0);
- break;
- default:
- return nullptr;
- }
- else
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ V = II;
+ break;
+ case Intrinsic::masked_store:
+ V = II->getOperand(0);
+ break;
+ default:
return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
+ }
} else {
V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
}
More information about the llvm-commits
mailing list