[llvm] [SelectionDAG] Detect impossible conditions using known bits analysis (PR #150715)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 26 08:06:15 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/150715
>From e7b65b3b1c2271d6e30c5f48af6ded494781553e Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 25 Jul 2025 18:51:24 -0400
Subject: [PATCH 1/5] [SelectionDAG] Detect impossible conditions using known
bits analysis
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 78 +
llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 20 +-
.../CodeGen/AMDGPU/frame-index-elimination.ll | 2104 +++++++++++++++++
3 files changed, 2186 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d3df43473013e..8bbe87bd5ea60 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13529,6 +13529,84 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
SDLoc DL(N);
+ // Detect impossible conditions using known bits analysis.
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
+ const APInt &C1 = N1C->getAPIntValue();
+ KnownBits KnownRHS = KnownBits::makeConstant(C1);
+
+ // Bail out early if RHS is unknown (shouldn't happen for constants)
+ if (KnownRHS.isUnknown())
+ return SDValue();
+
+ std::optional<bool> KnownVal;
+
+ // Handle special cases first (like GlobalISel does)
+ if (KnownRHS.isZero()) {
+ // x >=u 0 -> always true
+ // x <u 0 -> always false
+ if (Cond == ISD::SETUGE)
+ KnownVal = true;
+ else if (Cond == ISD::SETULT)
+ KnownVal = false;
+ }
+
+ // If not handled by special cases, use ICmpInst::compare
+ if (!KnownVal) {
+ KnownBits KnownLHS = DAG.computeKnownBits(N0);
+
+ // Convert ISD::CondCode to CmpInst::Predicate
+ CmpInst::Predicate Pred;
+ switch (Cond) {
+ case ISD::SETEQ:
+ Pred = CmpInst::ICMP_EQ;
+ break;
+ case ISD::SETNE:
+ Pred = CmpInst::ICMP_NE;
+ break;
+ case ISD::SETULT:
+ Pred = CmpInst::ICMP_ULT;
+ break;
+ case ISD::SETULE:
+ Pred = CmpInst::ICMP_ULE;
+ break;
+ case ISD::SETUGT:
+ Pred = CmpInst::ICMP_UGT;
+ break;
+ case ISD::SETUGE:
+ Pred = CmpInst::ICMP_UGE;
+ break;
+ case ISD::SETLT:
+ Pred = CmpInst::ICMP_SLT;
+ break;
+ case ISD::SETLE:
+ Pred = CmpInst::ICMP_SLE;
+ break;
+ case ISD::SETGT:
+ Pred = CmpInst::ICMP_SGT;
+ break;
+ case ISD::SETGE:
+ Pred = CmpInst::ICMP_SGE;
+ break;
+ default:
+ return SDValue(); // Unsupported predicate
+ }
+
+ // Use the same logic as GlobalISel: ICmpInst::compare
+ KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
+ }
+
+ // If the comparison result is known, replace with constant
+ if (KnownVal) {
+ if (*KnownVal) {
+ // Use the target's true value for comparisons
+ return DAG.getBoolConstant(true, DL, VT, VT);
+ } else {
+ // False is always 0
+ return DAG.getConstant(0, DL, VT);
+ }
+ }
+ }
+
if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
// If we prefer to have a setcc, and we don't, we'll try our best to
// recreate one using rebuildSetCC.
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 06e957fdcc6a2..9b22abcc94d3b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -597,22 +597,10 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
}
define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
-; SDISEL-LABEL: select_noccmp1:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp x0, #0
-; SDISEL-NEXT: ccmp x0, #13, #4, lt
-; SDISEL-NEXT: cset w8, gt
-; SDISEL-NEXT: cmp x2, #2
-; SDISEL-NEXT: ccmp x2, #4, #4, lt
-; SDISEL-NEXT: csinc w8, w8, wzr, le
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: csel x0, xzr, x3, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: select_noccmp1:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x0, x3
-; GISEL-NEXT: ret
+; CHECK-LABEL: select_noccmp1:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: mov x0, x3
+; CHECK-NEXT: ret
%c0 = icmp slt i64 %v1, 0
%c1 = icmp sgt i64 %v1, 13
%c2 = icmp slt i64 %v3, 2
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index 15cda622b902d..a8e0397594e5d 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -21,6 +21,46 @@
; GCN: ds_write_b32 v0, v0
define void @func_mov_fi_i32() #0 {
+; CI-LABEL: func_mov_fi_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v0, s32, 6
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_mov_fi_i32:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_mov_fi_i32:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s32
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_mov_fi_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s32
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_mov_fi_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s32
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
store volatile ptr addrspace(5) %alloca, ptr addrspace(3) poison
ret void
@@ -46,6 +86,61 @@ define void @func_mov_fi_i32() #0 {
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, [[ADD]]
; GFX9-NEXT: ds_write_b32 v0, v0
define void @func_mov_fi_i32_offset() #0 {
+; CI-LABEL: func_mov_fi_i32_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v0, s32, 6
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: v_lshr_b32_e64 v0, s32, 6
+; CI-NEXT: v_add_i32_e32 v0, vcc, 4, v0
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_mov_fi_i32_offset:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, v0
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_mov_fi_i32_offset:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s32
+; GFX9-FLATSCR-NEXT: s_add_i32 s0, s32, 4
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_mov_fi_i32_offset:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_add_i32 s0, s32, 4
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s32 :: v_dual_mov_b32 v1, s0
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_mov_fi_i32_offset:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_add_i32 s0, s32, 4
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s32 :: v_dual_mov_b32 v1, s0
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v1
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca i32, addrspace(5)
%alloca1 = alloca i32, addrspace(5)
store volatile ptr addrspace(5) %alloca0, ptr addrspace(3) poison
@@ -71,6 +166,48 @@ define void @func_mov_fi_i32_offset() #0 {
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_add_constant_to_fi_i32() #0 {
+; CI-LABEL: func_add_constant_to_fi_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v1, s32, 6
+; CI-NEXT: v_add_i32_e32 v0, vcc, 4, v1
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_add_constant_to_fi_i32:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v1, 6, s32
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, v1
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_add_constant_to_fi_i32:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_u32_e64 v0, 4, s32
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_add_constant_to_fi_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e64 v0, 4, s32
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_add_constant_to_fi_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e64 v0, 4, s32
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [2 x i32], align 4, addrspace(5)
%gep0 = getelementptr inbounds [2 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
store volatile ptr addrspace(5) %gep0, ptr addrspace(3) poison
@@ -93,6 +230,55 @@ define void @func_add_constant_to_fi_i32() #0 {
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_other_fi_user_i32() #0 {
+; CI-LABEL: func_other_fi_user_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_lshr_b32 s5, s32, 6
+; CI-NEXT: s_mul_i32 s4, s5, 9
+; CI-NEXT: v_mov_b32_e32 v0, s4
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_other_fi_user_i32:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_lshr_b32 s5, s32, 6
+; GFX9-MUBUF-NEXT: s_mul_i32 s4, s5, 9
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_other_fi_user_i32:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_mul_i32 s0, s32, 9
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_other_fi_user_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mul_i32 s0, s32, 9
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_other_fi_user_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mul_i32 s0, s32, 9
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [2 x i32], align 4, addrspace(5)
%ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
%mul = mul i32 %ptrtoint, 9
@@ -105,6 +291,45 @@ define void @func_other_fi_user_i32() #0 {
; MUBUF: buffer_store_dword v1, v0, s[0:3], 0 offen{{$}}
; GFX9-FLATSCR: scratch_store_dword v0, v1, off{{$}}
define void @func_store_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
+; CI-LABEL: func_store_private_arg_i32_ptr:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_mov_b32_e32 v1, 15
+; CI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_store_private_arg_i32_ptr:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 15
+; GFX9-MUBUF-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_store_private_arg_i32_ptr:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 15
+; GFX9-FLATSCR-NEXT: scratch_store_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_store_private_arg_i32_ptr:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 15
+; GFX11-TRUE16-NEXT: scratch_store_b32 v0, v1, off dlc
+; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_store_private_arg_i32_ptr:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 15
+; GFX11-FAKE16-NEXT: scratch_store_b32 v0, v1, off dlc
+; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
store volatile i32 15, ptr addrspace(5) %ptr
ret void
}
@@ -114,6 +339,40 @@ define void @func_store_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
; MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc{{$}}
; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off glc{{$}}
define void @func_load_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
+; CI-LABEL: func_load_private_arg_i32_ptr:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_load_private_arg_i32_ptr:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_load_private_arg_i32_ptr:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off glc
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_load_private_arg_i32_ptr:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: scratch_load_b32 v0, v0, off glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_load_private_arg_i32_ptr:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: scratch_load_b32 v0, v0, off glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%val = load volatile i32, ptr addrspace(5) %ptr
ret void
}
@@ -132,6 +391,48 @@ define void @func_load_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @void_func_byval_struct_i8_i32_ptr(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+; CI-LABEL: void_func_byval_struct_i8_i32_ptr:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v1, s32, 6
+; CI-NEXT: v_or_b32_e32 v0, 4, v1
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v1, 6, s32
+; GFX9-MUBUF-NEXT: v_or_b32_e32 v0, 4, v1
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_or_b32_e64 v0, s32, 4
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_or_b32_e64 v0, s32, 4
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: void_func_byval_struct_i8_i32_ptr:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e64 v0, s32, 4
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
%gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
%load1 = load i32, ptr addrspace(5) %gep1
@@ -146,6 +447,68 @@ define void @void_func_byval_struct_i8_i32_ptr(ptr addrspace(5) byval({ i8, i32
; GFX9-FLATSCR-NEXT: scratch_load_ubyte v0, off, s32
; GFX9-FLATSCR-NEXT: scratch_load_dword v1, off, s32 offset:4
define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+; CI-LABEL: void_func_byval_struct_i8_i32_ptr_value:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], s32
+; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: s_waitcnt vmcnt(1)
+; CI-NEXT: ds_write_b8 v0, v0
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: ds_write_b32 v0, v1
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: void_func_byval_struct_i8_i32_ptr_value:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: buffer_load_ubyte v0, off, s[0:3], s32
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(1)
+; GFX9-MUBUF-NEXT: ds_write_b8 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v1
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: void_func_byval_struct_i8_i32_ptr_value:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: scratch_load_ubyte v0, off, s32
+; GFX9-FLATSCR-NEXT: scratch_load_dword v1, off, s32 offset:4
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1)
+; GFX9-FLATSCR-NEXT: ds_write_b8 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v1
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: void_func_byval_struct_i8_i32_ptr_value:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, off, s32
+; GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s32 offset:4
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT: ds_store_b8 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: void_func_byval_struct_i8_i32_ptr_value:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: scratch_load_u8 v0, off, s32
+; GFX11-FAKE16-NEXT: scratch_load_b32 v1, off, s32 offset:4
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT: ds_store_b8 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v1
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
%gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
%load0 = load i8, ptr addrspace(5) %gep0
@@ -173,6 +536,88 @@ define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8
; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
define void @void_func_byval_struct_i8_i32_ptr_nonentry_block(ptr addrspace(5) byval({ i8, i32 }) %arg0, i32 %arg2) #0 {
+; CI-LABEL: void_func_byval_struct_i8_i32_ptr_nonentry_block:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CI-NEXT: s_cbranch_execz .LBB8_2
+; CI-NEXT: ; %bb.1: ; %bb
+; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v1, s32, 6
+; CI-NEXT: v_add_i32_e64 v0, s[6:7], 4, v1
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: .LBB8_2: ; %ret
+; CI-NEXT: s_or_b64 exec, exec, s[4:5]
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: void_func_byval_struct_i8_i32_ptr_nonentry_block:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-MUBUF-NEXT: s_cbranch_execz .LBB8_2
+; GFX9-MUBUF-NEXT: ; %bb.1: ; %bb
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v1, 6, s32
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, v1
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: .LBB8_2: ; %ret
+; GFX9-MUBUF-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: void_func_byval_struct_i8_i32_ptr_nonentry_block:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX9-FLATSCR-NEXT: s_cbranch_execz .LBB8_2
+; GFX9-FLATSCR-NEXT: ; %bb.1: ; %bb
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, off, s32 offset:4 glc
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_add_u32_e64 v0, 4, s32
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: .LBB8_2: ; %ret
+; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: void_func_byval_struct_i8_i32_ptr_nonentry_block:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-TRUE16-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB8_2
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %bb
+; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u32_e64 v0, 4, s32
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: .LBB8_2: ; %ret
+; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: void_func_byval_struct_i8_i32_ptr_nonentry_block:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-FAKE16-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX11-FAKE16-NEXT: s_cbranch_execz .LBB8_2
+; GFX11-FAKE16-NEXT: ; %bb.1: ; %bb
+; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u32_e64 v0, 4, s32
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: .LBB8_2: ; %ret
+; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %arg2, 0
br i1 %cmp, label %bb, label %ret
@@ -202,6 +647,73 @@ ret:
; GCN: ds_write_b32 v0, [[VZ]]
define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
+; CI-LABEL: func_other_fi_user_non_inline_imm_offset_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_lshr_b32 s5, s32, 6
+; CI-NEXT: s_addk_i32 s5, 0x200
+; CI-NEXT: v_mov_b32_e32 v0, 7
+; CI-NEXT: s_mul_i32 s4, s5, 9
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: v_mov_b32_e32 v0, s4
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_other_fi_user_non_inline_imm_offset_i32:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_lshr_b32 s5, s32, 6
+; GFX9-MUBUF-NEXT: s_addk_i32 s5, 0x200
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v0, 7
+; GFX9-MUBUF-NEXT: s_mul_i32 s4, s5, 9
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_other_fi_user_non_inline_imm_offset_i32:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_add_i32 s1, s32, 0x200
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 7
+; GFX9-FLATSCR-NEXT: s_mul_i32 s0, s1, 9
+; GFX9-FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:260
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_other_fi_user_non_inline_imm_offset_i32:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_add_i32 s1, s32, 0x200
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: s_mul_i32 s0, s1, 9
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, s0
+; GFX11-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:260 dlc
+; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_other_fi_user_non_inline_imm_offset_i32:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_add_i32 s1, s32, 0x200
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT: s_mul_i32 s0, s1, 9
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, s0
+; GFX11-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:260 dlc
+; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v1
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [128 x i32], align 4, addrspace(5)
%alloca1 = alloca [8 x i32], align 4, addrspace(5)
%gep0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca0, i32 0, i32 65
@@ -225,6 +737,103 @@ define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
; GCN: ds_write_b32 v0, [[VZ]]
define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 {
+; CI-LABEL: func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_lshr_b32 s5, s32, 6
+; CI-NEXT: s_addk_i32 s5, 0x200
+; CI-NEXT: v_mov_b32_e32 v0, 7
+; CI-NEXT: s_mul_i32 s4, s5, 9
+; CI-NEXT: ;;#ASMSTART
+; CI-NEXT: ; def vcc
+; CI-NEXT: ;;#ASMEND
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: v_mov_b32_e32 v0, s4
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ;;#ASMSTART
+; CI-NEXT: ; use vcc
+; CI-NEXT: ;;#ASMEND
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_lshr_b32 s5, s32, 6
+; GFX9-MUBUF-NEXT: s_addk_i32 s5, 0x200
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v0, 7
+; GFX9-MUBUF-NEXT: s_mul_i32 s4, s5, 9
+; GFX9-MUBUF-NEXT: ;;#ASMSTART
+; GFX9-MUBUF-NEXT: ; def vcc
+; GFX9-MUBUF-NEXT: ;;#ASMEND
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-MUBUF-NEXT: ;;#ASMSTART
+; GFX9-MUBUF-NEXT: ; use vcc
+; GFX9-MUBUF-NEXT: ;;#ASMEND
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_add_i32 s1, s32, 0x200
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 7
+; GFX9-FLATSCR-NEXT: s_mul_i32 s0, s1, 9
+; GFX9-FLATSCR-NEXT: ;;#ASMSTART
+; GFX9-FLATSCR-NEXT: ; def vcc
+; GFX9-FLATSCR-NEXT: ;;#ASMEND
+; GFX9-FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:260
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-FLATSCR-NEXT: ;;#ASMSTART
+; GFX9-FLATSCR-NEXT: ; use vcc
+; GFX9-FLATSCR-NEXT: ;;#ASMEND
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_add_i32 s1, s32, 0x200
+; GFX11-TRUE16-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-NEXT: ; def vcc
+; GFX11-TRUE16-NEXT: ;;#ASMEND
+; GFX11-TRUE16-NEXT: s_mul_i32 s0, s1, 9
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, s0
+; GFX11-TRUE16-NEXT: scratch_store_b32 off, v0, s32 offset:260 dlc
+; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-TRUE16-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-NEXT: ; use vcc
+; GFX11-TRUE16-NEXT: ;;#ASMEND
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_add_i32 s1, s32, 0x200
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; def vcc
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_mul_i32 s0, s1, 9
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, s0
+; GFX11-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:260 dlc
+; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use vcc
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v1
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [128 x i32], align 4, addrspace(5)
%alloca1 = alloca [8 x i32], align 4, addrspace(5)
%vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
@@ -253,6 +862,486 @@ declare void @func(ptr addrspace(5) nocapture) #0
; FLATSCR: scratch_store_dword v0, off, s33 offset:
; FLATSCR: scratch_store_dword v{{[0-9]+}}, off, s33 offset:
define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
+; CI-LABEL: undefined_stack_store_reg:
+; CI: ; %bb.0: ; %bb
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 s16, s33
+; CI-NEXT: s_mov_b32 s33, s32
+; CI-NEXT: s_or_saveexec_b64 s[18:19], -1
+; CI-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; CI-NEXT: s_mov_b64 exec, s[18:19]
+; CI-NEXT: v_writelane_b32 v42, s16, 18
+; CI-NEXT: v_writelane_b32 v42, s30, 0
+; CI-NEXT: v_writelane_b32 v42, s31, 1
+; CI-NEXT: v_writelane_b32 v42, s34, 2
+; CI-NEXT: v_writelane_b32 v42, s35, 3
+; CI-NEXT: v_writelane_b32 v42, s36, 4
+; CI-NEXT: v_writelane_b32 v42, s37, 5
+; CI-NEXT: v_writelane_b32 v42, s38, 6
+; CI-NEXT: v_writelane_b32 v42, s39, 7
+; CI-NEXT: v_writelane_b32 v42, s48, 8
+; CI-NEXT: v_writelane_b32 v42, s49, 9
+; CI-NEXT: v_writelane_b32 v42, s50, 10
+; CI-NEXT: v_writelane_b32 v42, s51, 11
+; CI-NEXT: v_writelane_b32 v42, s52, 12
+; CI-NEXT: v_writelane_b32 v42, s53, 13
+; CI-NEXT: v_writelane_b32 v42, s54, 14
+; CI-NEXT: v_writelane_b32 v42, s55, 15
+; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; CI-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; CI-NEXT: v_writelane_b32 v42, s64, 16
+; CI-NEXT: v_mov_b32_e32 v40, v0
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: s_addk_i32 s32, 0xc00
+; CI-NEXT: v_writelane_b32 v42, s65, 17
+; CI-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
+; CI-NEXT: s_and_saveexec_b64 s[54:55], vcc
+; CI-NEXT: s_cbranch_execz .LBB11_2
+; CI-NEXT: ; %bb.1: ; %bb4
+; CI-NEXT: s_getpc_b64 s[16:17]
+; CI-NEXT: s_add_u32 s16, s16, func at gotpcrel32@lo+4
+; CI-NEXT: s_addc_u32 s17, s17, func at gotpcrel32@hi+12
+; CI-NEXT: s_load_dwordx2 s[64:65], s[16:17], 0x0
+; CI-NEXT: s_mov_b64 s[34:35], s[4:5]
+; CI-NEXT: s_mov_b64 s[36:37], s[6:7]
+; CI-NEXT: s_mov_b64 s[38:39], s[8:9]
+; CI-NEXT: s_mov_b64 s[48:49], s[10:11]
+; CI-NEXT: s_mov_b32 s50, s12
+; CI-NEXT: s_mov_b32 s51, s13
+; CI-NEXT: s_mov_b32 s52, s14
+; CI-NEXT: s_mov_b32 s53, s15
+; CI-NEXT: v_mov_b32_e32 v41, v31
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:28
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:24
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20
+; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16
+; CI-NEXT: v_lshr_b32_e64 v0, s33, 6
+; CI-NEXT: s_mov_b64 s[4:5], s[34:35]
+; CI-NEXT: s_mov_b64 s[6:7], s[36:37]
+; CI-NEXT: s_mov_b64 s[8:9], s[38:39]
+; CI-NEXT: s_mov_b64 s[10:11], s[48:49]
+; CI-NEXT: s_mov_b32 s12, s50
+; CI-NEXT: s_mov_b32 s13, s51
+; CI-NEXT: s_mov_b32 s14, s52
+; CI-NEXT: s_mov_b32 s15, s53
+; CI-NEXT: v_mov_b32_e32 v31, v41
+; CI-NEXT: v_add_i32_e32 v0, vcc, 16, v0
+; CI-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; CI-NEXT: .LBB11_2: ; %bb5
+; CI-NEXT: s_or_b64 exec, exec, s[54:55]
+; CI-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; CI-NEXT: v_readlane_b32 s65, v42, 17
+; CI-NEXT: v_readlane_b32 s64, v42, 16
+; CI-NEXT: v_readlane_b32 s55, v42, 15
+; CI-NEXT: v_readlane_b32 s54, v42, 14
+; CI-NEXT: v_readlane_b32 s53, v42, 13
+; CI-NEXT: v_readlane_b32 s52, v42, 12
+; CI-NEXT: v_readlane_b32 s51, v42, 11
+; CI-NEXT: v_readlane_b32 s50, v42, 10
+; CI-NEXT: v_readlane_b32 s49, v42, 9
+; CI-NEXT: v_readlane_b32 s48, v42, 8
+; CI-NEXT: v_readlane_b32 s39, v42, 7
+; CI-NEXT: v_readlane_b32 s38, v42, 6
+; CI-NEXT: v_readlane_b32 s37, v42, 5
+; CI-NEXT: v_readlane_b32 s36, v42, 4
+; CI-NEXT: v_readlane_b32 s35, v42, 3
+; CI-NEXT: v_readlane_b32 s34, v42, 2
+; CI-NEXT: v_readlane_b32 s31, v42, 1
+; CI-NEXT: v_readlane_b32 s30, v42, 0
+; CI-NEXT: s_mov_b32 s32, s33
+; CI-NEXT: v_readlane_b32 s4, v42, 18
+; CI-NEXT: s_or_saveexec_b64 s[6:7], -1
+; CI-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; CI-NEXT: s_mov_b64 exec, s[6:7]
+; CI-NEXT: s_mov_b32 s33, s4
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: undefined_stack_store_reg:
+; GFX9-MUBUF: ; %bb.0: ; %bb
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_mov_b32 s16, s33
+; GFX9-MUBUF-NEXT: s_mov_b32 s33, s32
+; GFX9-MUBUF-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-MUBUF-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GFX9-MUBUF-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s16, 18
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s30, 0
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s31, 1
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s34, 2
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s35, 3
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s36, 4
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s37, 5
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s38, 6
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s39, 7
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s48, 8
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s49, 9
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s50, 10
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s51, 11
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s52, 12
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s53, 13
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s54, 14
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s55, 15
+; GFX9-MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s64, 16
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v40, v0
+; GFX9-MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-MUBUF-NEXT: s_addk_i32 s32, 0xc00
+; GFX9-MUBUF-NEXT: v_writelane_b32 v42, s65, 17
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
+; GFX9-MUBUF-NEXT: s_and_saveexec_b64 s[54:55], vcc
+; GFX9-MUBUF-NEXT: s_cbranch_execz .LBB11_2
+; GFX9-MUBUF-NEXT: ; %bb.1: ; %bb4
+; GFX9-MUBUF-NEXT: s_getpc_b64 s[16:17]
+; GFX9-MUBUF-NEXT: s_add_u32 s16, s16, func at gotpcrel32@lo+4
+; GFX9-MUBUF-NEXT: s_addc_u32 s17, s17, func at gotpcrel32@hi+12
+; GFX9-MUBUF-NEXT: s_load_dwordx2 s[64:65], s[16:17], 0x0
+; GFX9-MUBUF-NEXT: s_mov_b64 s[34:35], s[4:5]
+; GFX9-MUBUF-NEXT: s_mov_b64 s[36:37], s[6:7]
+; GFX9-MUBUF-NEXT: s_mov_b64 s[38:39], s[8:9]
+; GFX9-MUBUF-NEXT: s_mov_b64 s[48:49], s[10:11]
+; GFX9-MUBUF-NEXT: s_mov_b32 s50, s12
+; GFX9-MUBUF-NEXT: s_mov_b32 s51, s13
+; GFX9-MUBUF-NEXT: s_mov_b32 s52, s14
+; GFX9-MUBUF-NEXT: s_mov_b32 s53, s15
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v41, v31
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:28
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:24
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20
+; GFX9-MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s33
+; GFX9-MUBUF-NEXT: s_mov_b64 s[4:5], s[34:35]
+; GFX9-MUBUF-NEXT: s_mov_b64 s[6:7], s[36:37]
+; GFX9-MUBUF-NEXT: s_mov_b64 s[8:9], s[38:39]
+; GFX9-MUBUF-NEXT: s_mov_b64 s[10:11], s[48:49]
+; GFX9-MUBUF-NEXT: s_mov_b32 s12, s50
+; GFX9-MUBUF-NEXT: s_mov_b32 s13, s51
+; GFX9-MUBUF-NEXT: s_mov_b32 s14, s52
+; GFX9-MUBUF-NEXT: s_mov_b32 s15, s53
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v31, v41
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 16, v0
+; GFX9-MUBUF-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; GFX9-MUBUF-NEXT: .LBB11_2: ; %bb5
+; GFX9-MUBUF-NEXT: s_or_b64 exec, exec, s[54:55]
+; GFX9-MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-MUBUF-NEXT: v_readlane_b32 s65, v42, 17
+; GFX9-MUBUF-NEXT: v_readlane_b32 s64, v42, 16
+; GFX9-MUBUF-NEXT: v_readlane_b32 s55, v42, 15
+; GFX9-MUBUF-NEXT: v_readlane_b32 s54, v42, 14
+; GFX9-MUBUF-NEXT: v_readlane_b32 s53, v42, 13
+; GFX9-MUBUF-NEXT: v_readlane_b32 s52, v42, 12
+; GFX9-MUBUF-NEXT: v_readlane_b32 s51, v42, 11
+; GFX9-MUBUF-NEXT: v_readlane_b32 s50, v42, 10
+; GFX9-MUBUF-NEXT: v_readlane_b32 s49, v42, 9
+; GFX9-MUBUF-NEXT: v_readlane_b32 s48, v42, 8
+; GFX9-MUBUF-NEXT: v_readlane_b32 s39, v42, 7
+; GFX9-MUBUF-NEXT: v_readlane_b32 s38, v42, 6
+; GFX9-MUBUF-NEXT: v_readlane_b32 s37, v42, 5
+; GFX9-MUBUF-NEXT: v_readlane_b32 s36, v42, 4
+; GFX9-MUBUF-NEXT: v_readlane_b32 s35, v42, 3
+; GFX9-MUBUF-NEXT: v_readlane_b32 s34, v42, 2
+; GFX9-MUBUF-NEXT: v_readlane_b32 s31, v42, 1
+; GFX9-MUBUF-NEXT: v_readlane_b32 s30, v42, 0
+; GFX9-MUBUF-NEXT: s_mov_b32 s32, s33
+; GFX9-MUBUF-NEXT: v_readlane_b32 s4, v42, 18
+; GFX9-MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-MUBUF-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GFX9-MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-MUBUF-NEXT: s_mov_b32 s33, s4
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: undefined_stack_store_reg:
+; GFX9-FLATSCR: ; %bb.0: ; %bb
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, s33
+; GFX9-FLATSCR-NEXT: s_mov_b32 s33, s32
+; GFX9-FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; GFX9-FLATSCR-NEXT: scratch_store_dword off, v44, s33 offset:32 ; 4-byte Folded Spill
+; GFX9-FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s0, 18
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s30, 0
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s31, 1
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s34, 2
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s35, 3
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s36, 4
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s37, 5
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s38, 6
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s39, 7
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s48, 8
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s49, 9
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s50, 10
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s51, 11
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s52, 12
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s53, 13
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s54, 14
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s55, 15
+; GFX9-FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill
+; GFX9-FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s64, 16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v40, v0
+; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-FLATSCR-NEXT: s_add_i32 s32, s32, 48
+; GFX9-FLATSCR-NEXT: v_writelane_b32 v44, s65, 17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[40:43], s0
+; GFX9-FLATSCR-NEXT: s_and_saveexec_b64 s[54:55], vcc
+; GFX9-FLATSCR-NEXT: s_cbranch_execz .LBB11_2
+; GFX9-FLATSCR-NEXT: ; %bb.1: ; %bb4
+; GFX9-FLATSCR-NEXT: s_getpc_b64 s[0:1]
+; GFX9-FLATSCR-NEXT: s_add_u32 s0, s0, func at gotpcrel32@lo+4
+; GFX9-FLATSCR-NEXT: s_addc_u32 s1, s1, func at gotpcrel32@hi+12
+; GFX9-FLATSCR-NEXT: s_load_dwordx2 s[64:65], s[0:1], 0x0
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[34:35], s[4:5]
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[36:37], s[6:7]
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[38:39], s[8:9]
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[48:49], s[10:11]
+; GFX9-FLATSCR-NEXT: s_mov_b32 s50, s12
+; GFX9-FLATSCR-NEXT: s_mov_b32 s51, s13
+; GFX9-FLATSCR-NEXT: s_mov_b32 s52, s14
+; GFX9-FLATSCR-NEXT: s_mov_b32 s53, s15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v41, v31
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; GFX9-FLATSCR-NEXT: s_add_i32 s0, s33, 16
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[4:5], s[34:35]
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[6:7], s[36:37]
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[8:9], s[38:39]
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[10:11], s[48:49]
+; GFX9-FLATSCR-NEXT: s_mov_b32 s12, s50
+; GFX9-FLATSCR-NEXT: s_mov_b32 s13, s51
+; GFX9-FLATSCR-NEXT: s_mov_b32 s14, s52
+; GFX9-FLATSCR-NEXT: s_mov_b32 s15, s53
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v31, v41
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[40:43], s33 offset:16
+; GFX9-FLATSCR-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; GFX9-FLATSCR-NEXT: .LBB11_2: ; %bb5
+; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[54:55]
+; GFX9-FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
+; GFX9-FLATSCR-NEXT: scratch_load_dword v40, off, s33 offset:4 ; 4-byte Folded Reload
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s65, v44, 17
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s64, v44, 16
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s55, v44, 15
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s54, v44, 14
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s53, v44, 13
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s52, v44, 12
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s51, v44, 11
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s50, v44, 10
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s49, v44, 9
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s48, v44, 8
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s39, v44, 7
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s38, v44, 6
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s37, v44, 5
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s36, v44, 4
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s35, v44, 3
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s34, v44, 2
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s31, v44, 1
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s30, v44, 0
+; GFX9-FLATSCR-NEXT: s_mov_b32 s32, s33
+; GFX9-FLATSCR-NEXT: v_readlane_b32 s0, v44, 18
+; GFX9-FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
+; GFX9-FLATSCR-NEXT: scratch_load_dword v44, off, s33 offset:32 ; 4-byte Folded Reload
+; GFX9-FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
+; GFX9-FLATSCR-NEXT: s_mov_b32 s33, s0
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: undefined_stack_store_reg:
+; GFX11-TRUE16: ; %bb.0: ; %bb
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33
+; GFX11-TRUE16-NEXT: s_mov_b32 s33, s32
+; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s33 offset:32 ; 4-byte Folded Spill
+; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s0, 17
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4
+; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, v0
+; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 48
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s30, 0
+; GFX11-TRUE16-NEXT: scratch_store_b128 off, v[40:43], s0
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s31, 1
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s34, 2
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s35, 3
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s36, 4
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s37, 5
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s38, 6
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s39, 7
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s48, 8
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s49, 9
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s50, 10
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s51, 11
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s52, 12
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s53, 13
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s54, 14
+; GFX11-TRUE16-NEXT: s_mov_b32 s54, exec_lo
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s64, 15
+; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s65, 16
+; GFX11-TRUE16-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB11_2
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %bb4
+; GFX11-TRUE16-NEXT: s_getpc_b64 s[0:1]
+; GFX11-TRUE16-NEXT: s_add_u32 s0, s0, func at gotpcrel32@lo+4
+; GFX11-TRUE16-NEXT: s_addc_u32 s1, s1, func at gotpcrel32@hi+12
+; GFX11-TRUE16-NEXT: s_mov_b64 s[34:35], s[4:5]
+; GFX11-TRUE16-NEXT: s_load_b64 s[64:65], s[0:1], 0x0
+; GFX11-TRUE16-NEXT: s_mov_b64 s[36:37], s[6:7]
+; GFX11-TRUE16-NEXT: s_mov_b64 s[38:39], s[8:9]
+; GFX11-TRUE16-NEXT: s_mov_b64 s[48:49], s[10:11]
+; GFX11-TRUE16-NEXT: s_mov_b32 s50, s12
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, v31
+; GFX11-TRUE16-NEXT: s_mov_b32 s51, s13
+; GFX11-TRUE16-NEXT: s_mov_b32 s52, s14
+; GFX11-TRUE16-NEXT: s_mov_b32 s53, s15
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; GFX11-TRUE16-NEXT: s_add_i32 s0, s33, 16
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v41 :: v_dual_mov_b32 v0, s0
+; GFX11-TRUE16-NEXT: s_mov_b64 s[4:5], s[34:35]
+; GFX11-TRUE16-NEXT: s_mov_b64 s[6:7], s[36:37]
+; GFX11-TRUE16-NEXT: s_mov_b64 s[8:9], s[38:39]
+; GFX11-TRUE16-NEXT: s_mov_b64 s[10:11], s[48:49]
+; GFX11-TRUE16-NEXT: s_mov_b32 s12, s50
+; GFX11-TRUE16-NEXT: s_mov_b32 s13, s51
+; GFX11-TRUE16-NEXT: s_mov_b32 s14, s52
+; GFX11-TRUE16-NEXT: s_mov_b32 s15, s53
+; GFX11-TRUE16-NEXT: scratch_store_b128 off, v[40:43], s33 offset:16
+; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; GFX11-TRUE16-NEXT: .LBB11_2: ; %bb5
+; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s54
+; GFX11-TRUE16-NEXT: s_clause 0x1
+; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33
+; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4
+; GFX11-TRUE16-NEXT: v_readlane_b32 s65, v44, 16
+; GFX11-TRUE16-NEXT: v_readlane_b32 s64, v44, 15
+; GFX11-TRUE16-NEXT: v_readlane_b32 s54, v44, 14
+; GFX11-TRUE16-NEXT: v_readlane_b32 s53, v44, 13
+; GFX11-TRUE16-NEXT: v_readlane_b32 s52, v44, 12
+; GFX11-TRUE16-NEXT: v_readlane_b32 s51, v44, 11
+; GFX11-TRUE16-NEXT: v_readlane_b32 s50, v44, 10
+; GFX11-TRUE16-NEXT: v_readlane_b32 s49, v44, 9
+; GFX11-TRUE16-NEXT: v_readlane_b32 s48, v44, 8
+; GFX11-TRUE16-NEXT: v_readlane_b32 s39, v44, 7
+; GFX11-TRUE16-NEXT: v_readlane_b32 s38, v44, 6
+; GFX11-TRUE16-NEXT: v_readlane_b32 s37, v44, 5
+; GFX11-TRUE16-NEXT: v_readlane_b32 s36, v44, 4
+; GFX11-TRUE16-NEXT: v_readlane_b32 s35, v44, 3
+; GFX11-TRUE16-NEXT: v_readlane_b32 s34, v44, 2
+; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v44, 1
+; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v44, 0
+; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33
+; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v44, 17
+; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s33 offset:32 ; 4-byte Folded Reload
+; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: undefined_stack_store_reg:
+; GFX11-FAKE16: ; %bb.0: ; %bb
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33
+; GFX11-FAKE16-NEXT: s_mov_b32 s33, s32
+; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s33 offset:32 ; 4-byte Folded Spill
+; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s0, 17
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4
+; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, v0
+; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 48
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s30, 0
+; GFX11-FAKE16-NEXT: scratch_store_b128 off, v[40:43], s0
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s31, 1
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s34, 2
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s35, 3
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s36, 4
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s37, 5
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s38, 6
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s39, 7
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s48, 8
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s49, 9
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s50, 10
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s51, 11
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s52, 12
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s53, 13
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s54, 14
+; GFX11-FAKE16-NEXT: s_mov_b32 s54, exec_lo
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s64, 15
+; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s65, 16
+; GFX11-FAKE16-NEXT: v_cmpx_eq_u32_e32 0, v1
+; GFX11-FAKE16-NEXT: s_cbranch_execz .LBB11_2
+; GFX11-FAKE16-NEXT: ; %bb.1: ; %bb4
+; GFX11-FAKE16-NEXT: s_getpc_b64 s[0:1]
+; GFX11-FAKE16-NEXT: s_add_u32 s0, s0, func at gotpcrel32@lo+4
+; GFX11-FAKE16-NEXT: s_addc_u32 s1, s1, func at gotpcrel32@hi+12
+; GFX11-FAKE16-NEXT: s_mov_b64 s[34:35], s[4:5]
+; GFX11-FAKE16-NEXT: s_load_b64 s[64:65], s[0:1], 0x0
+; GFX11-FAKE16-NEXT: s_mov_b64 s[36:37], s[6:7]
+; GFX11-FAKE16-NEXT: s_mov_b64 s[38:39], s[8:9]
+; GFX11-FAKE16-NEXT: s_mov_b64 s[48:49], s[10:11]
+; GFX11-FAKE16-NEXT: s_mov_b32 s50, s12
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, v31
+; GFX11-FAKE16-NEXT: s_mov_b32 s51, s13
+; GFX11-FAKE16-NEXT: s_mov_b32 s52, s14
+; GFX11-FAKE16-NEXT: s_mov_b32 s53, s15
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; GFX11-FAKE16-NEXT: s_add_i32 s0, s33, 16
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v31, v41 :: v_dual_mov_b32 v0, s0
+; GFX11-FAKE16-NEXT: s_mov_b64 s[4:5], s[34:35]
+; GFX11-FAKE16-NEXT: s_mov_b64 s[6:7], s[36:37]
+; GFX11-FAKE16-NEXT: s_mov_b64 s[8:9], s[38:39]
+; GFX11-FAKE16-NEXT: s_mov_b64 s[10:11], s[48:49]
+; GFX11-FAKE16-NEXT: s_mov_b32 s12, s50
+; GFX11-FAKE16-NEXT: s_mov_b32 s13, s51
+; GFX11-FAKE16-NEXT: s_mov_b32 s14, s52
+; GFX11-FAKE16-NEXT: s_mov_b32 s15, s53
+; GFX11-FAKE16-NEXT: scratch_store_b128 off, v[40:43], s33 offset:16
+; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[64:65]
+; GFX11-FAKE16-NEXT: .LBB11_2: ; %bb5
+; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s54
+; GFX11-FAKE16-NEXT: s_clause 0x1
+; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s33
+; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:4
+; GFX11-FAKE16-NEXT: v_readlane_b32 s65, v44, 16
+; GFX11-FAKE16-NEXT: v_readlane_b32 s64, v44, 15
+; GFX11-FAKE16-NEXT: v_readlane_b32 s54, v44, 14
+; GFX11-FAKE16-NEXT: v_readlane_b32 s53, v44, 13
+; GFX11-FAKE16-NEXT: v_readlane_b32 s52, v44, 12
+; GFX11-FAKE16-NEXT: v_readlane_b32 s51, v44, 11
+; GFX11-FAKE16-NEXT: v_readlane_b32 s50, v44, 10
+; GFX11-FAKE16-NEXT: v_readlane_b32 s49, v44, 9
+; GFX11-FAKE16-NEXT: v_readlane_b32 s48, v44, 8
+; GFX11-FAKE16-NEXT: v_readlane_b32 s39, v44, 7
+; GFX11-FAKE16-NEXT: v_readlane_b32 s38, v44, 6
+; GFX11-FAKE16-NEXT: v_readlane_b32 s37, v44, 5
+; GFX11-FAKE16-NEXT: v_readlane_b32 s36, v44, 4
+; GFX11-FAKE16-NEXT: v_readlane_b32 s35, v44, 3
+; GFX11-FAKE16-NEXT: v_readlane_b32 s34, v44, 2
+; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v44, 1
+; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v44, 0
+; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33
+; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v44, 17
+; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1
+; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s33 offset:32 ; 4-byte Folded Reload
+; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1
+; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
bb:
%tmp = alloca <4 x float>, align 16, addrspace(5)
%tmp2 = insertelement <4 x float> poison, float %arg, i32 0
@@ -285,6 +1374,88 @@ bb5:
; GCN: ds_write_b32 v{{[0-9]+}}, [[PTR]]
define void @alloca_ptr_nonentry_block(i32 %arg0) #0 {
+; CI-LABEL: alloca_ptr_nonentry_block:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CI-NEXT: s_cbranch_execz .LBB12_2
+; CI-NEXT: ; %bb.1: ; %bb
+; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: v_lshr_b32_e64 v1, s32, 6
+; CI-NEXT: v_or_b32_e32 v0, 4, v1
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v0
+; CI-NEXT: .LBB12_2: ; %ret
+; CI-NEXT: s_or_b64 exec, exec, s[4:5]
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: alloca_ptr_nonentry_block:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-MUBUF-NEXT: s_cbranch_execz .LBB12_2
+; GFX9-MUBUF-NEXT: ; %bb.1: ; %bb
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v1, 6, s32
+; GFX9-MUBUF-NEXT: v_or_b32_e32 v0, 4, v1
+; GFX9-MUBUF-NEXT: ds_write_b32 v0, v0
+; GFX9-MUBUF-NEXT: .LBB12_2: ; %ret
+; GFX9-MUBUF-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: alloca_ptr_nonentry_block:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX9-FLATSCR-NEXT: s_cbranch_execz .LBB12_2
+; GFX9-FLATSCR-NEXT: ; %bb.1: ; %bb
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, off, s32 offset:4 glc
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: v_or_b32_e64 v0, s32, 4
+; GFX9-FLATSCR-NEXT: ds_write_b32 v0, v0
+; GFX9-FLATSCR-NEXT: .LBB12_2: ; %ret
+; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: alloca_ptr_nonentry_block:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-TRUE16-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB12_2
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %bb
+; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: v_or_b32_e64 v0, s32, 4
+; GFX11-TRUE16-NEXT: ds_store_b32 v0, v0
+; GFX11-TRUE16-NEXT: .LBB12_2: ; %ret
+; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: alloca_ptr_nonentry_block:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, exec_lo
+; GFX11-FAKE16-NEXT: v_cmpx_eq_u32_e32 0, v0
+; GFX11-FAKE16-NEXT: s_cbranch_execz .LBB12_2
+; GFX11-FAKE16-NEXT: ; %bb.1: ; %bb
+; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e64 v0, s32, 4
+; GFX11-FAKE16-NEXT: ds_store_b32 v0, v0
+; GFX11-FAKE16-NEXT: .LBB12_2: ; %ret
+; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca { i8, i32 }, align 8, addrspace(5)
%cmp = icmp eq i32 %arg0, 0
br i1 %cmp, label %bb, label %ret
@@ -319,6 +1490,79 @@ ret:
; GFX11-FAKE16-DAG: ds_store_b16 v{{[0-9]+}}, [[C]] offset:8
; GFX11-FAKE16-NEXT: s_endpgm
define protected amdgpu_kernel void @tied_operand_test(i1 %c1, i1 %c2, i32 %val) {
+; CI-LABEL: tied_operand_test:
+; CI: ; %bb.0: ; %entry
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: buffer_load_ushort v0, off, s[0:3], 0
+; CI-NEXT: s_load_dword s4, s[8:9], 0x1
+; CI-NEXT: v_mov_b32_e32 v1, 0x7b
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_lshl_b32 s4, s4, 1
+; CI-NEXT: v_mov_b32_e32 v2, s4
+; CI-NEXT: ds_write_b16 v2, v1 offset:8
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: ds_write_b16 v2, v0 offset:10
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: tied_operand_test:
+; GFX9-MUBUF: ; %bb.0: ; %entry
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: buffer_load_ushort v0, off, s[0:3], 0
+; GFX9-MUBUF-NEXT: s_load_dword s4, s[8:9], 0x4
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_lshl_b32 s4, s4, 1
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-MUBUF-NEXT: ds_write_b16 v2, v1 offset:8
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: ds_write_b16 v2, v0 offset:10
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: tied_operand_test:
+; GFX9-FLATSCR: ; %bb.0: ; %entry
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: scratch_load_ushort v0, off, s0
+; GFX9-FLATSCR-NEXT: s_load_dword s0, s[4:5], 0x4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_lshl_b32 s0, s0, 1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, s0
+; GFX9-FLATSCR-NEXT: ds_write_b16 v2, v1 offset:8
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: ds_write_b16 v2, v0 offset:10
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: tied_operand_test:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, off
+; GFX11-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x7b
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_lshl_b32 s0, s0, 1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, s0
+; GFX11-TRUE16-NEXT: ds_store_b16_d16_hi v1, v0 offset:8
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: ds_store_b16 v1, v0 offset:10
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: tied_operand_test:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, off
+; GFX11-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x4
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_lshl_b32 s0, s0, 1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 0x7b :: v_dual_mov_b32 v2, s0
+; GFX11-FAKE16-NEXT: ds_store_b16 v2, v1 offset:8
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: ds_store_b16 v2, v0 offset:10
+; GFX11-FAKE16-NEXT: s_endpgm
entry:
%scratch0 = alloca i16, align 4, addrspace(5)
%scratch1 = alloca i16, align 4, addrspace(5)
@@ -345,6 +1589,115 @@ entry:
; GFX9-MUBUF-NEXT: v_add_u32_e32 [[SCALED_FP]], 0x3000, [[SCALED_FP]]
; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 64, [[SCALED_FP]]
define void @fi_vop3_literal_error() {
+; CI-LABEL: fi_vop3_literal_error:
+; CI: ; %bb.0: ; %entry
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 s4, s33
+; CI-NEXT: s_add_i32 s33, s32, 0x7ffc0
+; CI-NEXT: s_and_b32 s33, s33, 0xfff80000
+; CI-NEXT: v_lshr_b32_e64 v1, s33, 6
+; CI-NEXT: s_movk_i32 vcc_lo, 0x3000
+; CI-NEXT: v_add_i32_e32 v1, vcc, vcc_lo, v1
+; CI-NEXT: v_add_i32_e32 v0, vcc, 64, v1
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: v_mov_b32_e32 v2, 0x2000
+; CI-NEXT: buffer_store_dword v1, v2, s[0:3], s33 offen
+; CI-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:4 glc
+; CI-NEXT: s_waitcnt vmcnt(0)
+; CI-NEXT: s_mov_b32 s5, s34
+; CI-NEXT: s_mov_b32 s34, s32
+; CI-NEXT: s_add_i32 s32, s32, 0x200000
+; CI-NEXT: s_mov_b32 s32, s34
+; CI-NEXT: s_mov_b32 s34, s5
+; CI-NEXT: s_mov_b32 s33, s4
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-MUBUF-LABEL: fi_vop3_literal_error:
+; GFX9-MUBUF: ; %bb.0: ; %entry
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_mov_b32 s4, s33
+; GFX9-MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0
+; GFX9-MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
+; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v1, 6, s33
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x3000, v1
+; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 64, v1
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0x2000
+; GFX9-MUBUF-NEXT: buffer_store_dword v1, v2, s[0:3], s33 offen
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen glc
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: s_mov_b32 s5, s34
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen offset:4 glc
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
+; GFX9-MUBUF-NEXT: s_mov_b32 s34, s32
+; GFX9-MUBUF-NEXT: s_add_i32 s32, s32, 0x200000
+; GFX9-MUBUF-NEXT: ; kill: killed $vgpr0
+; GFX9-MUBUF-NEXT: s_mov_b32 s32, s34
+; GFX9-MUBUF-NEXT: s_mov_b32 s34, s5
+; GFX9-MUBUF-NEXT: s_mov_b32 s33, s4
+; GFX9-MUBUF-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-FLATSCR-LABEL: fi_vop3_literal_error:
+; GFX9-FLATSCR: ; %bb.0: ; %entry
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, s33
+; GFX9-FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff
+; GFX9-FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
+; GFX9-FLATSCR-NEXT: s_mov_b32 s1, s34
+; GFX9-FLATSCR-NEXT: s_mov_b32 s34, s32
+; GFX9-FLATSCR-NEXT: s_add_i32 s32, s32, 0x8000
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-FLATSCR-NEXT: s_add_i32 s2, s33, 0x2000
+; GFX9-FLATSCR-NEXT: scratch_store_dword off, v0, s2
+; GFX9-FLATSCR-NEXT: s_add_i32 s2, s33, 0x3000
+; GFX9-FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s2 offset:64 glc
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: s_mov_b32 s32, s34
+; GFX9-FLATSCR-NEXT: s_mov_b32 s34, s1
+; GFX9-FLATSCR-NEXT: s_mov_b32 s33, s0
+; GFX9-FLATSCR-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: fi_vop3_literal_error:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, s33
+; GFX11-TRUE16-NEXT: s_add_i32 s33, s32, 0x1fff
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-TRUE16-NEXT: s_and_b32 s33, s33, 0xffffe000
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, s34
+; GFX11-TRUE16-NEXT: s_mov_b32 s34, s32
+; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 0x8000
+; GFX11-TRUE16-NEXT: s_add_i32 s2, s33, 0x2000
+; GFX11-TRUE16-NEXT: s_mov_b32 s32, s34
+; GFX11-TRUE16-NEXT: scratch_store_b32 off, v0, s2
+; GFX11-TRUE16-NEXT: s_add_i32 s2, s33, 0x3000
+; GFX11-TRUE16-NEXT: s_mov_b32 s34, s1
+; GFX11-TRUE16-NEXT: scratch_load_b64 v[0:1], off, s2 offset:64 glc dlc
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: s_mov_b32 s33, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: fi_vop3_literal_error:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, s33
+; GFX11-FAKE16-NEXT: s_add_i32 s33, s32, 0x1fff
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-FAKE16-NEXT: s_and_b32 s33, s33, 0xffffe000
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, s34
+; GFX11-FAKE16-NEXT: s_mov_b32 s34, s32
+; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 0x8000
+; GFX11-FAKE16-NEXT: s_add_i32 s2, s33, 0x2000
+; GFX11-FAKE16-NEXT: s_mov_b32 s32, s34
+; GFX11-FAKE16-NEXT: scratch_store_b32 off, v0, s2
+; GFX11-FAKE16-NEXT: s_add_i32 s2, s33, 0x3000
+; GFX11-FAKE16-NEXT: s_mov_b32 s34, s1
+; GFX11-FAKE16-NEXT: scratch_load_b64 v[0:1], off, s2 offset:64 glc dlc
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: s_mov_b32 s33, s0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%pin.low = alloca i32, align 8192, addrspace(5)
%local.area = alloca [1060 x i64], align 4096, addrspace(5)
@@ -363,6 +1716,132 @@ entry:
; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0, 0x2010
; GCN: s_addc_u32 [[ADD_HI:s[0-9]+]], s{{[0-9]+}}, 0
define amdgpu_kernel void @fi_sop2_s_add_u32_literal_error() #0 {
+; CI-LABEL: fi_sop2_s_add_u32_literal_error:
+; CI: ; %bb.0: ; %entry
+; CI-NEXT: s_load_dword s5, s[8:9], 0x30
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: s_add_u32 s4, 0, 0x2010
+; CI-NEXT: v_mov_b32_e32 v0, 0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_addc_u32 s5, s5, 0
+; CI-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], 2
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; CI-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0
+; CI-NEXT: .LBB15_1: ; %.shuffle.then.i.i.i.i
+; CI-NEXT: ; =>This Inner Loop Header: Depth=1
+; CI-NEXT: s_and_b64 vcc, exec, s[4:5]
+; CI-NEXT: s_cbranch_vccnz .LBB15_1
+; CI-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8
+; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
+; CI-NEXT: s_waitcnt vmcnt(1)
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; CI-NEXT: s_waitcnt vmcnt(1)
+; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: fi_sop2_s_add_u32_literal_error:
+; GFX9-MUBUF: ; %bb.0: ; %entry
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: s_mov_b64 s[4:5], src_private_base
+; GFX9-MUBUF-NEXT: s_add_u32 s4, 0, 0x2010
+; GFX9-MUBUF-NEXT: s_addc_u32 s5, s5, 0
+; GFX9-MUBUF-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], 2
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX9-MUBUF-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9-MUBUF-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0
+; GFX9-MUBUF-NEXT: .LBB15_1: ; %.shuffle.then.i.i.i.i
+; GFX9-MUBUF-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-MUBUF-NEXT: s_and_b64 vcc, exec, s[4:5]
+; GFX9-MUBUF-NEXT: s_cbranch_vccnz .LBB15_1
+; GFX9-MUBUF-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(1)
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(1)
+; GFX9-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: fi_sop2_s_add_u32_literal_error:
+; GFX9-FLATSCR: ; %bb.0: ; %entry
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX9-FLATSCR-NEXT: s_add_u32 s0, 0, 0x2010
+; GFX9-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-FLATSCR-NEXT: v_cmp_lt_u64_e64 s[0:1], s[0:1], 2
+; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s2
+; GFX9-FLATSCR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GFX9-FLATSCR-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
+; GFX9-FLATSCR-NEXT: .LBB15_1: ; %.shuffle.then.i.i.i.i
+; GFX9-FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-FLATSCR-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GFX9-FLATSCR-NEXT: s_cbranch_vccnz .LBB15_1
+; GFX9-FLATSCR-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: s_nop 1
+; GFX9-FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:4
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: fi_sop2_s_add_u32_literal_error:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX11-TRUE16-NEXT: s_add_u32 s0, 0, 0x2010
+; GFX11-TRUE16-NEXT: s_addc_u32 s1, s1, 0
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-TRUE16-NEXT: v_cmp_lt_u64_e64 s0, s[0:1], 2
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-TRUE16-NEXT: scratch_store_b64 off, v[0:1], s1
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 1, v2
+; GFX11-TRUE16-NEXT: .LBB15_1: ; %.shuffle.then.i.i.i.i
+; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB15_1
+; GFX11-TRUE16-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX11-TRUE16-NEXT: scratch_load_b64 v[0:1], off, off offset:4
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, 0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: scratch_store_b64 off, v[0:1], s0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fi_sop2_s_add_u32_literal_error:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX11-FAKE16-NEXT: s_add_u32 s0, 0, 0x2010
+; GFX11-FAKE16-NEXT: s_addc_u32 s1, s1, 0
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-FAKE16-NEXT: v_cmp_lt_u64_e64 s0, s[0:1], 2
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-FAKE16-NEXT: scratch_store_b64 off, v[0:1], s1
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e64 s0, 1, v2
+; GFX11-FAKE16-NEXT: .LBB15_1: ; %.shuffle.then.i.i.i.i
+; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB15_1
+; GFX11-FAKE16-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX11-FAKE16-NEXT: scratch_load_b64 v[0:1], off, off offset:4
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, 0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: scratch_store_b64 off, v[0:1], s0
+; GFX11-FAKE16-NEXT: s_endpgm
entry:
%.omp.reduction.element.i.i.i.i = alloca [1024 x i32], align 4, addrspace(5)
%Total3.i.i = alloca [1024 x i32], align 16, addrspace(5)
@@ -385,6 +1864,116 @@ vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i
; GCN-LABEL: {{^}}fi_sop2_and_literal_error:
; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1fe00
define amdgpu_kernel void @fi_sop2_and_literal_error() #0 {
+; CI-LABEL: fi_sop2_and_literal_error:
+; CI: ; %bb.0: ; %entry
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v0, 0
+; CI-NEXT: s_mov_b64 s[4:5], -1
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; CI-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0
+; CI-NEXT: .LBB16_1: ; %.shuffle.then.i.i.i.i
+; CI-NEXT: ; =>This Inner Loop Header: Depth=1
+; CI-NEXT: s_and_b64 vcc, exec, s[4:5]
+; CI-NEXT: s_cbranch_vccnz .LBB16_1
+; CI-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8
+; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
+; CI-NEXT: s_waitcnt vmcnt(1)
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; CI-NEXT: s_waitcnt vmcnt(1)
+; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: fi_sop2_and_literal_error:
+; GFX9-MUBUF: ; %bb.0: ; %entry
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-MUBUF-NEXT: s_mov_b64 s[4:5], -1
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX9-MUBUF-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9-MUBUF-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0
+; GFX9-MUBUF-NEXT: .LBB16_1: ; %.shuffle.then.i.i.i.i
+; GFX9-MUBUF-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-MUBUF-NEXT: s_and_b64 vcc, exec, s[4:5]
+; GFX9-MUBUF-NEXT: s_cbranch_vccnz .LBB16_1
+; GFX9-MUBUF-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(1)
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(1)
+; GFX9-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: fi_sop2_and_literal_error:
+; GFX9-FLATSCR: ; %bb.0: ; %entry
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[0:1], -1
+; GFX9-FLATSCR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GFX9-FLATSCR-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
+; GFX9-FLATSCR-NEXT: .LBB16_1: ; %.shuffle.then.i.i.i.i
+; GFX9-FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-FLATSCR-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GFX9-FLATSCR-NEXT: s_cbranch_vccnz .LBB16_1
+; GFX9-FLATSCR-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: s_nop 1
+; GFX9-FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:4
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: fi_sop2_and_literal_error:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, -1
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 1, v2
+; GFX11-TRUE16-NEXT: scratch_store_b64 off, v[0:1], s1
+; GFX11-TRUE16-NEXT: .LBB16_1: ; %.shuffle.then.i.i.i.i
+; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-TRUE16-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB16_1
+; GFX11-TRUE16-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX11-TRUE16-NEXT: scratch_load_b64 v[0:1], off, off offset:4
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, 0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: scratch_store_b64 off, v[0:1], s0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fi_sop2_and_literal_error:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, -1
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e64 s0, 1, v2
+; GFX11-FAKE16-NEXT: scratch_store_b64 off, v[0:1], s1
+; GFX11-FAKE16-NEXT: .LBB16_1: ; %.shuffle.then.i.i.i.i
+; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB16_1
+; GFX11-FAKE16-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX11-FAKE16-NEXT: scratch_load_b64 v[0:1], off, off offset:4
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, 0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: scratch_store_b64 off, v[0:1], s0
+; GFX11-FAKE16-NEXT: s_endpgm
entry:
%.omp.reduction.element.i.i.i.i = alloca [1024 x i32], align 4, addrspace(5)
%Total3.i.i = alloca [1024 x i32], align 16, addrspace(5)
@@ -406,6 +1995,116 @@ vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i
; GCN-LABEL: {{^}}fi_sop2_or_literal_error:
; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3039
define amdgpu_kernel void @fi_sop2_or_literal_error() #0 {
+; CI-LABEL: fi_sop2_or_literal_error:
+; CI: ; %bb.0: ; %entry
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_mov_b32_e32 v0, 0
+; CI-NEXT: s_mov_b64 s[4:5], -1
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; CI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; CI-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0
+; CI-NEXT: .LBB17_1: ; %.shuffle.then.i.i.i.i
+; CI-NEXT: ; =>This Inner Loop Header: Depth=1
+; CI-NEXT: s_and_b64 vcc, exec, s[4:5]
+; CI-NEXT: s_cbranch_vccnz .LBB17_1
+; CI-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8
+; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
+; CI-NEXT: s_waitcnt vmcnt(1)
+; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; CI-NEXT: s_waitcnt vmcnt(1)
+; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: fi_sop2_or_literal_error:
+; GFX9-MUBUF: ; %bb.0: ; %entry
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-MUBUF-NEXT: s_mov_b64 s[4:5], -1
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX9-MUBUF-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GFX9-MUBUF-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0
+; GFX9-MUBUF-NEXT: .LBB17_1: ; %.shuffle.then.i.i.i.i
+; GFX9-MUBUF-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-MUBUF-NEXT: s_and_b64 vcc, exec, s[4:5]
+; GFX9-MUBUF-NEXT: s_cbranch_vccnz .LBB17_1
+; GFX9-MUBUF-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX9-MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8
+; GFX9-MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(1)
+; GFX9-MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
+; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(1)
+; GFX9-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: fi_sop2_or_literal_error:
+; GFX9-FLATSCR: ; %bb.0: ; %entry
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0
+; GFX9-FLATSCR-NEXT: s_mov_b64 s[0:1], -1
+; GFX9-FLATSCR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GFX9-FLATSCR-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
+; GFX9-FLATSCR-NEXT: .LBB17_1: ; %.shuffle.then.i.i.i.i
+; GFX9-FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-FLATSCR-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GFX9-FLATSCR-NEXT: s_cbranch_vccnz .LBB17_1
+; GFX9-FLATSCR-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
+; GFX9-FLATSCR-NEXT: s_nop 1
+; GFX9-FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:4
+; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: fi_sop2_or_literal_error:
+; GFX11-TRUE16: ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, -1
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 1, v2
+; GFX11-TRUE16-NEXT: scratch_store_b64 off, v[0:1], s1
+; GFX11-TRUE16-NEXT: .LBB17_1: ; %.shuffle.then.i.i.i.i
+; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-TRUE16-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB17_1
+; GFX11-TRUE16-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX11-TRUE16-NEXT: scratch_load_b64 v[0:1], off, off offset:4
+; GFX11-TRUE16-NEXT: s_mov_b32 s0, 0
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT: scratch_store_b64 off, v[0:1], s0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: fi_sop2_or_literal_error:
+; GFX11-FAKE16: ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, -1
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v0
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e64 s0, 1, v2
+; GFX11-FAKE16-NEXT: scratch_store_b64 off, v[0:1], s1
+; GFX11-FAKE16-NEXT: .LBB17_1: ; %.shuffle.then.i.i.i.i
+; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB17_1
+; GFX11-FAKE16-NEXT: ; %bb.2: ; %vector.body.i.i.i.i
+; GFX11-FAKE16-NEXT: scratch_load_b64 v[0:1], off, off offset:4
+; GFX11-FAKE16-NEXT: s_mov_b32 s0, 0
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT: scratch_store_b64 off, v[0:1], s0
+; GFX11-FAKE16-NEXT: s_endpgm
entry:
%.omp.reduction.element.i.i.i.i = alloca [1024 x i32], align 4, addrspace(5)
%Total3.i.i = alloca [1024 x i32], align 16, addrspace(5)
@@ -435,6 +2134,76 @@ vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i
; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @s_multiple_frame_indexes_literal_offsets(i32 inreg %arg0) #0 {
+; CI-LABEL: s_multiple_frame_indexes_literal_offsets:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dword s4, s[8:9], 0x0
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: s_movk_i32 s5, 0x44
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_cmp_eq_u32 s4, 0
+; CI-NEXT: s_cselect_b32 s4, s5, 0x48
+; CI-NEXT: s_mov_b32 s5, 0
+; CI-NEXT: ;;#ASMSTART
+; CI-NEXT: ; use s4, s5
+; CI-NEXT: ;;#ASMEND
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: s_multiple_frame_indexes_literal_offsets:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_load_dword s4, s[8:9], 0x0
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: s_movk_i32 s5, 0x44
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_cmp_eq_u32 s4, 0
+; GFX9-MUBUF-NEXT: s_cselect_b32 s4, s5, 0x48
+; GFX9-MUBUF-NEXT: s_mov_b32 s5, 0
+; GFX9-MUBUF-NEXT: ;;#ASMSTART
+; GFX9-MUBUF-NEXT: ; use s4, s5
+; GFX9-MUBUF-NEXT: ;;#ASMEND
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: s_multiple_frame_indexes_literal_offsets:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_load_dword s0, s[4:5], 0x0
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: s_movk_i32 s1, 0x44
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_cmp_eq_u32 s0, 0
+; GFX9-FLATSCR-NEXT: s_cselect_b32 s0, s1, 0x48
+; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT: ;;#ASMSTART
+; GFX9-FLATSCR-NEXT: ; use s0, s1
+; GFX9-FLATSCR-NEXT: ;;#ASMEND
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: s_multiple_frame_indexes_literal_offsets:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX11-TRUE16-NEXT: s_movk_i32 s1, 0x44
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_cmp_eq_u32 s0, 0
+; GFX11-TRUE16-NEXT: s_cselect_b32 s0, s1, 0x48
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0
+; GFX11-TRUE16-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-NEXT: ; use s0, s1
+; GFX11-TRUE16-NEXT: ;;#ASMEND
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: s_multiple_frame_indexes_literal_offsets:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX11-FAKE16-NEXT: s_movk_i32 s1, 0x44
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_cmp_eq_u32 s0, 0
+; GFX11-FAKE16-NEXT: s_cselect_b32 s0, s1, 0x48
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use s0, s1
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_endpgm
%alloca0 = alloca [17 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
%alloca2 = alloca i32, align 4, addrspace(5)
@@ -455,6 +2224,76 @@ define amdgpu_kernel void @s_multiple_frame_indexes_literal_offsets(i32 inreg %a
; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @s_multiple_frame_indexes_one_imm_one_literal_offset(i32 inreg %arg0) #0 {
+; CI-LABEL: s_multiple_frame_indexes_one_imm_one_literal_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dword s4, s[8:9], 0x0
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: s_mov_b32 s5, 64
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_cmp_eq_u32 s4, 0
+; CI-NEXT: s_cselect_b32 s4, s5, 0x44
+; CI-NEXT: s_mov_b32 s5, 0
+; CI-NEXT: ;;#ASMSTART
+; CI-NEXT: ; use s4, s5
+; CI-NEXT: ;;#ASMEND
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: s_multiple_frame_indexes_one_imm_one_literal_offset:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_load_dword s4, s[8:9], 0x0
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: s_mov_b32 s5, 64
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_cmp_eq_u32 s4, 0
+; GFX9-MUBUF-NEXT: s_cselect_b32 s4, s5, 0x44
+; GFX9-MUBUF-NEXT: s_mov_b32 s5, 0
+; GFX9-MUBUF-NEXT: ;;#ASMSTART
+; GFX9-MUBUF-NEXT: ; use s4, s5
+; GFX9-MUBUF-NEXT: ;;#ASMEND
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: s_multiple_frame_indexes_one_imm_one_literal_offset:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_load_dword s0, s[4:5], 0x0
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 64
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_cmp_eq_u32 s0, 0
+; GFX9-FLATSCR-NEXT: s_cselect_b32 s0, s1, 0x44
+; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT: ;;#ASMSTART
+; GFX9-FLATSCR-NEXT: ; use s0, s1
+; GFX9-FLATSCR-NEXT: ;;#ASMEND
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: s_multiple_frame_indexes_one_imm_one_literal_offset:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, 64
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_cmp_eq_u32 s0, 0
+; GFX11-TRUE16-NEXT: s_cselect_b32 s0, s1, 0x44
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0
+; GFX11-TRUE16-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-NEXT: ; use s0, s1
+; GFX11-TRUE16-NEXT: ;;#ASMEND
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: s_multiple_frame_indexes_one_imm_one_literal_offset:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, 64
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_cmp_eq_u32 s0, 0
+; GFX11-FAKE16-NEXT: s_cselect_b32 s0, s1, 0x44
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use s0, s1
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_endpgm
%alloca0 = alloca [16 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
%alloca2 = alloca i32, align 4, addrspace(5)
@@ -472,6 +2311,76 @@ define amdgpu_kernel void @s_multiple_frame_indexes_one_imm_one_literal_offset(i
; GCN: s_mov_b32 [[ALLOCA0:s[0-9]+]], 0
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @s_multiple_frame_indexes_imm_offsets(i32 inreg %arg0) #0 {
+; CI-LABEL: s_multiple_frame_indexes_imm_offsets:
+; CI: ; %bb.0:
+; CI-NEXT: s_load_dword s4, s[8:9], 0x0
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: s_mov_b32 s5, 16
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_cmp_eq_u32 s4, 0
+; CI-NEXT: s_cselect_b32 s4, s5, 20
+; CI-NEXT: s_mov_b32 s5, 0
+; CI-NEXT: ;;#ASMSTART
+; CI-NEXT: ; use s4, s5
+; CI-NEXT: ;;#ASMEND
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: s_multiple_frame_indexes_imm_offsets:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_load_dword s4, s[8:9], 0x0
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: s_mov_b32 s5, 16
+; GFX9-MUBUF-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-MUBUF-NEXT: s_cmp_eq_u32 s4, 0
+; GFX9-MUBUF-NEXT: s_cselect_b32 s4, s5, 20
+; GFX9-MUBUF-NEXT: s_mov_b32 s5, 0
+; GFX9-MUBUF-NEXT: ;;#ASMSTART
+; GFX9-MUBUF-NEXT: ; use s4, s5
+; GFX9-MUBUF-NEXT: ;;#ASMEND
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: s_multiple_frame_indexes_imm_offsets:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_load_dword s0, s[4:5], 0x0
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 16
+; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLATSCR-NEXT: s_cmp_eq_u32 s0, 0
+; GFX9-FLATSCR-NEXT: s_cselect_b32 s0, s1, 20
+; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0
+; GFX9-FLATSCR-NEXT: ;;#ASMSTART
+; GFX9-FLATSCR-NEXT: ; use s0, s1
+; GFX9-FLATSCR-NEXT: ;;#ASMEND
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: s_multiple_frame_indexes_imm_offsets:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, 16
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_cmp_eq_u32 s0, 0
+; GFX11-TRUE16-NEXT: s_cselect_b32 s0, s1, 20
+; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0
+; GFX11-TRUE16-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-NEXT: ; use s0, s1
+; GFX11-TRUE16-NEXT: ;;#ASMEND
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: s_multiple_frame_indexes_imm_offsets:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_load_b32 s0, s[4:5], 0x0
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, 16
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_cmp_eq_u32 s0, 0
+; GFX11-FAKE16-NEXT: s_cselect_b32 s0, s1, 20
+; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use s0, s1
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_endpgm
%alloca0 = alloca [4 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
%alloca2 = alloca i32, align 4, addrspace(5)
@@ -489,6 +2398,71 @@ define amdgpu_kernel void @s_multiple_frame_indexes_imm_offsets(i32 inreg %arg0)
; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @v_multiple_frame_indexes_literal_offsets() #0 {
+; CI-LABEL: v_multiple_frame_indexes_literal_offsets:
+; CI: ; %bb.0:
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: v_mov_b32_e32 v1, 0x48
+; CI-NEXT: v_mov_b32_e32 v2, 0x44
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: ;;#ASMSTART
+; CI-NEXT: ; use v0, v1
+; CI-NEXT: ;;#ASMEND
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: v_multiple_frame_indexes_literal_offsets:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 0x48
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0x44
+; GFX9-MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-MUBUF-NEXT: ;;#ASMSTART
+; GFX9-MUBUF-NEXT: ; use v0, v1
+; GFX9-MUBUF-NEXT: ;;#ASMEND
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: v_multiple_frame_indexes_literal_offsets:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x48
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0x44
+; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-FLATSCR-NEXT: ;;#ASMSTART
+; GFX9-FLATSCR-NEXT: ; use v0, v1
+; GFX9-FLATSCR-NEXT: ;;#ASMEND
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: v_multiple_frame_indexes_literal_offsets:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0x44
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, 0x48, v1 :: v_dual_mov_b32 v1, 0
+; GFX11-TRUE16-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-NEXT: ; use v0, v1
+; GFX11-TRUE16-NEXT: ;;#ASMEND
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: v_multiple_frame_indexes_literal_offsets:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0x44
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v0, 0x48, v1 :: v_dual_mov_b32 v1, 0
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use v0, v1
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_endpgm
%vgpr = call i32 @llvm.amdgcn.workitem.id.x()
%alloca0 = alloca [17 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
@@ -507,6 +2481,69 @@ define amdgpu_kernel void @v_multiple_frame_indexes_literal_offsets() #0 {
; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @v_multiple_frame_indexes_one_imm_one_literal_offset() #0 {
+; CI-LABEL: v_multiple_frame_indexes_one_imm_one_literal_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: v_mov_b32_e32 v1, 0x44
+; CI-NEXT: v_mov_b32_e32 v2, 64
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: ;;#ASMSTART
+; CI-NEXT: ; use v0, v1
+; CI-NEXT: ;;#ASMEND
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: v_multiple_frame_indexes_one_imm_one_literal_offset:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 0x44
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 64
+; GFX9-MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-MUBUF-NEXT: ;;#ASMSTART
+; GFX9-MUBUF-NEXT: ; use v0, v1
+; GFX9-MUBUF-NEXT: ;;#ASMEND
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: v_multiple_frame_indexes_one_imm_one_literal_offset:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x44
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 64
+; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-FLATSCR-NEXT: ;;#ASMSTART
+; GFX9-FLATSCR-NEXT: ; use v0, v1
+; GFX9-FLATSCR-NEXT: ;;#ASMEND
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: v_multiple_frame_indexes_one_imm_one_literal_offset:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 64 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, 0x44, v1 :: v_dual_mov_b32 v1, 0
+; GFX11-TRUE16-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-NEXT: ; use v0, v1
+; GFX11-TRUE16-NEXT: ;;#ASMEND
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: v_multiple_frame_indexes_one_imm_one_literal_offset:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 64 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v0, 0x44, v1 :: v_dual_mov_b32 v1, 0
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use v0, v1
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_endpgm
%vgpr = call i32 @llvm.amdgcn.workitem.id.x()
%alloca0 = alloca [16 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
@@ -525,6 +2562,69 @@ define amdgpu_kernel void @v_multiple_frame_indexes_one_imm_one_literal_offset()
; GCN: v_mov_b32_e32 [[ALLOCA0:v[0-9]+]], 0{{$}}
; GCN: ; use [[SELECT]], [[ALLOCA0]]
define amdgpu_kernel void @v_multiple_frame_indexes_imm_offsets() #0 {
+; CI-LABEL: v_multiple_frame_indexes_imm_offsets:
+; CI: ; %bb.0:
+; CI-NEXT: s_add_u32 s0, s0, s17
+; CI-NEXT: v_mov_b32_e32 v1, 12
+; CI-NEXT: v_mov_b32_e32 v2, 8
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: s_addc_u32 s1, s1, 0
+; CI-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; CI-NEXT: v_mov_b32_e32 v1, 0
+; CI-NEXT: ;;#ASMSTART
+; CI-NEXT: ; use v0, v1
+; CI-NEXT: ;;#ASMEND
+; CI-NEXT: s_endpgm
+;
+; GFX9-MUBUF-LABEL: v_multiple_frame_indexes_imm_offsets:
+; GFX9-MUBUF: ; %bb.0:
+; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s17
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 12
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 8
+; GFX9-MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0
+; GFX9-MUBUF-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-MUBUF-NEXT: ;;#ASMSTART
+; GFX9-MUBUF-NEXT: ; use v0, v1
+; GFX9-MUBUF-NEXT: ;;#ASMEND
+; GFX9-MUBUF-NEXT: s_endpgm
+;
+; GFX9-FLATSCR-LABEL: v_multiple_frame_indexes_imm_offsets:
+; GFX9-FLATSCR: ; %bb.0:
+; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 8
+; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
+; GFX9-FLATSCR-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-FLATSCR-NEXT: ;;#ASMSTART
+; GFX9-FLATSCR-NEXT: ; use v0, v1
+; GFX9-FLATSCR-NEXT: ;;#ASMEND
+; GFX9-FLATSCR-NEXT: s_endpgm
+;
+; GFX11-TRUE16-LABEL: v_multiple_frame_indexes_imm_offsets:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 8 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, 12, v1 :: v_dual_mov_b32 v1, 0
+; GFX11-TRUE16-NEXT: ;;#ASMSTART
+; GFX11-TRUE16-NEXT: ; use v0, v1
+; GFX11-TRUE16-NEXT: ;;#ASMEND
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: v_multiple_frame_indexes_imm_offsets:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 8 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v0, 12, v1 :: v_dual_mov_b32 v1, 0
+; GFX11-FAKE16-NEXT: ;;#ASMSTART
+; GFX11-FAKE16-NEXT: ; use v0, v1
+; GFX11-FAKE16-NEXT: ;;#ASMEND
+; GFX11-FAKE16-NEXT: s_endpgm
%vgpr = call i32 @llvm.amdgcn.workitem.id.x()
%alloca0 = alloca [2 x i32], align 8, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
@@ -536,3 +2636,7 @@ define amdgpu_kernel void @v_multiple_frame_indexes_imm_offsets() #0 {
}
attributes #0 = { nounwind }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; GFX9: {{.*}}
+; MUBUF: {{.*}}
>From ef7f64ac72fa4fc365111650dd5f46a2c2b83919 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 25 Jul 2025 19:12:20 -0400
Subject: [PATCH 2/5] Replace with *KnownVal
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8bbe87bd5ea60..01c2e80ac3541 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13597,13 +13597,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
// If the comparison result is known, replace with constant
if (KnownVal) {
- if (*KnownVal) {
- // Use the target's true value for comparisons
- return DAG.getBoolConstant(true, DL, VT, VT);
- } else {
- // False is always 0
- return DAG.getConstant(0, DL, VT);
- }
+ return DAG.getBoolConstant(*KnownVal, DL, VT, VT);
}
}
>From 601c7adf0b93623453651618ff522963777e8f8a Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 25 Jul 2025 19:13:27 -0400
Subject: [PATCH 3/5] Value type
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 01c2e80ac3541..526bdb3e7ee8e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13597,7 +13597,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
// If the comparison result is known, replace with constant
if (KnownVal) {
- return DAG.getBoolConstant(*KnownVal, DL, VT, VT);
+ return DAG.getBoolConstant(*KnownVal, DL, VT, N1.getValueType());
}
}
>From c33e17628fdd631db3168b8b2db2c154c897e1cf Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 25 Jul 2025 19:16:06 -0400
Subject: [PATCH 4/5] Simplify more
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 526bdb3e7ee8e..f369136f99ca8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13550,8 +13550,8 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
KnownVal = false;
}
- // If not handled by special cases, use ICmpInst::compare
if (!KnownVal) {
+ bool SupportedPredicate = true;
KnownBits KnownLHS = DAG.computeKnownBits(N0);
// Convert ISD::CondCode to CmpInst::Predicate
@@ -13588,17 +13588,17 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
Pred = CmpInst::ICMP_SGE;
break;
default:
- return SDValue(); // Unsupported predicate
+ SupportedPredicate = false;
+ break;
}
- // Use the same logic as GlobalISel: ICmpInst::compare
- KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
+ if (SupportedPredicate)
+ KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
}
// If the comparison result is known, replace with constant
- if (KnownVal) {
+ if (KnownVal)
return DAG.getBoolConstant(*KnownVal, DL, VT, N1.getValueType());
- }
}
if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
>From 3c397d24c0bc52b68442ce31aa242072a1b39374 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 26 Jul 2025 11:06:02 -0400
Subject: [PATCH 5/5] tests
---
.../aarch64-split-and-bitmask-immediate.ll | 20 ++-----
llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 20 +++++--
...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 7 +--
llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll | 12 ++---
llvm/test/CodeGen/ARM/cmp-peephole.ll | 17 ++----
...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 21 ++------
llvm/test/CodeGen/ARM/select-imm.ll | 10 ++--
llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll | 27 +---------
llvm/test/CodeGen/LoongArch/sextw-removal.ll | 38 ++++++++------
llvm/test/CodeGen/RISCV/pr64935.ll | 5 +-
llvm/test/CodeGen/RISCV/sextw-removal.ll | 4 +-
.../X86/2007-10-12-CoalesceExtSubReg.ll | 15 ++----
llvm/test/CodeGen/X86/apx/or.ll | 52 +++++++++----------
13 files changed, 92 insertions(+), 156 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
index 113eb14ca4803..a2d178166bb6e 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
@@ -20,10 +20,7 @@ entry:
define i8 @test2(i32 %a) {
; CHECK-LABEL: test2:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #135 // =0x87
-; CHECK-NEXT: and w8, w0, w8
-; CHECK-NEXT: cmp w8, #1024
-; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
%and = and i32 %a, 135
@@ -68,10 +65,7 @@ entry:
define i8 @test5(i64 %a) {
; CHECK-LABEL: test5:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: and x8, x0, #0x3ffffc000
-; CHECK-NEXT: and x8, x8, #0xfffffffe00007fff
-; CHECK-NEXT: cmp x8, #1024
-; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
%and = and i64 %a, 8589950976
@@ -84,10 +78,7 @@ entry:
define i8 @test6(i64 %a) {
; CHECK-LABEL: test6:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #135 // =0x87
-; CHECK-NEXT: and x8, x0, x8
-; CHECK-NEXT: cmp x8, #1024
-; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
%and = and i64 %a, 135
@@ -252,10 +243,7 @@ entry:
define i8 @test11(i64 %a) {
; CHECK-LABEL: test11:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #-1610612736 // =0xa0000000
-; CHECK-NEXT: and x8, x0, x8
-; CHECK-NEXT: cmp x8, #1024
-; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
%and = and i64 %a, 2684354560
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 9b22abcc94d3b..06e957fdcc6a2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -597,10 +597,22 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
}
define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
-; CHECK-LABEL: select_noccmp1:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: mov x0, x3
-; CHECK-NEXT: ret
+; SDISEL-LABEL: select_noccmp1:
+; SDISEL: ; %bb.0:
+; SDISEL-NEXT: cmp x0, #0
+; SDISEL-NEXT: ccmp x0, #13, #4, lt
+; SDISEL-NEXT: cset w8, gt
+; SDISEL-NEXT: cmp x2, #2
+; SDISEL-NEXT: ccmp x2, #4, #4, lt
+; SDISEL-NEXT: csinc w8, w8, wzr, le
+; SDISEL-NEXT: cmp w8, #0
+; SDISEL-NEXT: csel x0, xzr, x3, ne
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: select_noccmp1:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: mov x0, x3
+; GISEL-NEXT: ret
%c0 = icmp slt i64 %v1, 0
%c1 = icmp sgt i64 %v1, 13
%c2 = icmp slt i64 %v3, 2
diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 4a73b10811d29..19a2babaffcb2 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -326,12 +326,7 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-128 // =0xffffff80
-; CHECK-NEXT: lsl w8, w8, w1
-; CHECK-NEXT: and w8, w8, w0
-; CHECK-NEXT: and w8, w8, #0x80
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x
diff --git a/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll b/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll
index 33c5ba7987974..8240ce073a6de 100644
--- a/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll
+++ b/llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll
@@ -95,9 +95,7 @@ define i1 @lt64_u64(i64 %0) {
define i1 @lt8_u16_and_5(i8 %0) {
; CHECK-LABEL: lt8_u16_and_5:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
%2 = and i8 %0, 5
%3 = icmp ult i8 %2, 16
@@ -118,9 +116,7 @@ define i1 @lt8_u16_and_19(i8 %0) {
define i1 @lt32_u16_and_7(i32 %0) {
; CHECK-LABEL: lt32_u16_and_7:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
%2 = and i32 %0, 7
%3 = icmp ult i32 %2, 16
@@ -141,9 +137,7 @@ define i1 @lt32_u16_and_21(i32 %0) {
define i1 @lt64_u16_and_9(i64 %0) {
; CHECK-LABEL: lt64_u16_and_9:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
%2 = and i64 %0, 9
%3 = icmp ult i64 %2, 16
diff --git a/llvm/test/CodeGen/ARM/cmp-peephole.ll b/llvm/test/CodeGen/ARM/cmp-peephole.ll
index 73888558e6647..54a34e3f6077d 100644
--- a/llvm/test/CodeGen/ARM/cmp-peephole.ll
+++ b/llvm/test/CodeGen/ARM/cmp-peephole.ll
@@ -137,23 +137,17 @@ define i1 @cmp_ne_zero_or_rr(i32 %a, i32 %b) {
define i1 @cmp_ne_zero_or_ri(i32 %a) {
; ARM-LABEL: cmp_ne_zero_or_ri:
; ARM: @ %bb.0:
-; ARM-NEXT: orrs r0, r0, #42
-; ARM-NEXT: movwne r0, #1
+; ARM-NEXT: mov r0, #1
; ARM-NEXT: bx lr
;
; THUMB-LABEL: cmp_ne_zero_or_ri:
; THUMB: @ %bb.0:
-; THUMB-NEXT: movs r1, #42
-; THUMB-NEXT: orrs r0, r1
-; THUMB-NEXT: subs r1, r0, #1
-; THUMB-NEXT: sbcs r0, r1
+; THUMB-NEXT: movs r0, #1
; THUMB-NEXT: bx lr
;
; THUMB2-LABEL: cmp_ne_zero_or_ri:
; THUMB2: @ %bb.0:
-; THUMB2-NEXT: orrs r0, r0, #42
-; THUMB2-NEXT: it ne
-; THUMB2-NEXT: movne r0, #1
+; THUMB2-NEXT: movs r0, #1
; THUMB2-NEXT: bx lr
%or = or i32 %a, 42
%res = icmp ne i32 %or, 0
@@ -726,10 +720,7 @@ define i1 @cmp_eq_zero_or_ri(i32 %a) {
;
; THUMB-LABEL: cmp_eq_zero_or_ri:
; THUMB: @ %bb.0:
-; THUMB-NEXT: movs r1, #42
-; THUMB-NEXT: orrs r0, r1
-; THUMB-NEXT: rsbs r1, r0, #0
-; THUMB-NEXT: adcs r0, r1
+; THUMB-NEXT: movs r0, #0
; THUMB-NEXT: bx lr
;
; THUMB2-LABEL: cmp_eq_zero_or_ri:
diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index a8421ae9a6a89..77dd121b5e797 100644
--- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -1067,23 +1067,10 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
; ARM-NEXT: mov r0, #0
; ARM-NEXT: bx lr
;
-; THUMB6-LABEL: scalar_i8_signbit_eq_with_nonzero:
-; THUMB6: @ %bb.0:
-; THUMB6-NEXT: uxtb r1, r1
-; THUMB6-NEXT: movs r2, #127
-; THUMB6-NEXT: mvns r2, r2
-; THUMB6-NEXT: lsls r2, r1
-; THUMB6-NEXT: ands r2, r0
-; THUMB6-NEXT: uxtb r0, r2
-; THUMB6-NEXT: subs r1, r0, #1
-; THUMB6-NEXT: rsbs r0, r1, #0
-; THUMB6-NEXT: adcs r0, r1
-; THUMB6-NEXT: bx lr
-;
-; THUMB78-LABEL: scalar_i8_signbit_eq_with_nonzero:
-; THUMB78: @ %bb.0:
-; THUMB78-NEXT: movs r0, #0
-; THUMB78-NEXT: bx lr
+; THUMB-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: movs r0, #0
+; THUMB-NEXT: bx lr
%t0 = shl i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp eq i8 %t1, 1 ; should be comparing with 0
diff --git a/llvm/test/CodeGen/ARM/select-imm.ll b/llvm/test/CodeGen/ARM/select-imm.ll
index 186276b50ceeb..3b8170aa16866 100644
--- a/llvm/test/CodeGen/ARM/select-imm.ll
+++ b/llvm/test/CodeGen/ARM/select-imm.ll
@@ -652,13 +652,11 @@ define i1 @t10() {
; V8MBASE-NEXT: .pad #8
; V8MBASE-NEXT: sub sp, #8
; V8MBASE-NEXT: movs r0, #7
-; V8MBASE-NEXT: mvns r1, r0
-; V8MBASE-NEXT: str r1, [sp]
-; V8MBASE-NEXT: adds r0, r1, #5
+; V8MBASE-NEXT: mvns r0, r0
+; V8MBASE-NEXT: str r0, [sp]
+; V8MBASE-NEXT: adds r0, r0, #5
; V8MBASE-NEXT: str r0, [sp, #4]
-; V8MBASE-NEXT: adds r1, #8
-; V8MBASE-NEXT: rsbs r0, r1, #0
-; V8MBASE-NEXT: adcs r0, r1
+; V8MBASE-NEXT: movs r0, #1
; V8MBASE-NEXT: add sp, #8
; V8MBASE-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
index 559bb68741e12..a0a0a19c0322f 100644
--- a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
+++ b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
@@ -10,35 +10,10 @@ define i32 @fred(ptr %a0) #0 {
; CHECK-LABEL: fred:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
-; CHECK-NEXT: if (p0) jump:nt .LBB0_2
-; CHECK-NEXT: }
-; CHECK-NEXT: // %bb.1: // %b2
-; CHECK-NEXT: {
-; CHECK-NEXT: r3:2 = combine(#0,#0)
-; CHECK-NEXT: r1:0 = memd(r0+#0)
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: p0 = vcmph.eq(r1:0,r3:2)
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r1:0 = mask(p0)
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r0 = and(r0,#1)
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: p0 = cmp.eq(r0,#11)
-; CHECK-NEXT: r0 = #1
-; CHECK-NEXT: }
-; CHECK-NEXT: {
+; CHECK-NEXT: if (!p0) r0 = #1
; CHECK-NEXT: if (p0) r0 = #0
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
-; CHECK-NEXT: .LBB0_2: // %b14
-; CHECK-NEXT: {
-; CHECK-NEXT: r0 = #0
-; CHECK-NEXT: jumpr r31
-; CHECK-NEXT: }
b0:
switch i32 undef, label %b14 [
i32 5, label %b2
diff --git a/llvm/test/CodeGen/LoongArch/sextw-removal.ll b/llvm/test/CodeGen/LoongArch/sextw-removal.ll
index 0c31ff9eee1f2..b0aebd9f39e9d 100644
--- a/llvm/test/CodeGen/LoongArch/sextw-removal.ll
+++ b/llvm/test/CodeGen/LoongArch/sextw-removal.ll
@@ -326,11 +326,13 @@ declare i64 @llvm.ctpop.i64(i64)
define void @test8(i32 signext %arg, i32 signext %arg1) nounwind {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %bb
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $sp, $sp, -32
+; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
; CHECK-NEXT: sra.w $a0, $a0, $a1
-; CHECK-NEXT: addi.w $fp, $zero, -256
+; CHECK-NEXT: addi.d $fp, $zero, -256
+; CHECK-NEXT: ori $s0, $zero, 1
; CHECK-NEXT: .p2align 4, , 16
; CHECK-NEXT: .LBB7_1: # %bb2
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -338,20 +340,23 @@ define void @test8(i32 signext %arg, i32 signext %arg1) nounwind {
; CHECK-NEXT: pcaddu18i $ra, %call36(foo)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: or $a0, $a0, $fp
-; CHECK-NEXT: bnez $a0, .LBB7_1
+; CHECK-NEXT: bnez $s0, .LBB7_1
; CHECK-NEXT: # %bb.2: # %bb7
-; CHECK-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 32
; CHECK-NEXT: ret
;
; NORMV-LABEL: test8:
; NORMV: # %bb.0: # %bb
-; NORMV-NEXT: addi.d $sp, $sp, -16
-; NORMV-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; NORMV-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
+; NORMV-NEXT: addi.d $sp, $sp, -32
+; NORMV-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; NORMV-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; NORMV-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
; NORMV-NEXT: sra.w $a0, $a0, $a1
-; NORMV-NEXT: addi.w $fp, $zero, -256
+; NORMV-NEXT: addi.d $fp, $zero, -256
+; NORMV-NEXT: ori $s0, $zero, 1
; NORMV-NEXT: .p2align 4, , 16
; NORMV-NEXT: .LBB7_1: # %bb2
; NORMV-NEXT: # =>This Inner Loop Header: Depth=1
@@ -359,11 +364,12 @@ define void @test8(i32 signext %arg, i32 signext %arg1) nounwind {
; NORMV-NEXT: pcaddu18i $ra, %call36(foo)
; NORMV-NEXT: jirl $ra, $ra, 0
; NORMV-NEXT: or $a0, $a0, $fp
-; NORMV-NEXT: bnez $a0, .LBB7_1
+; NORMV-NEXT: bnez $s0, .LBB7_1
; NORMV-NEXT: # %bb.2: # %bb7
-; NORMV-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
-; NORMV-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; NORMV-NEXT: addi.d $sp, $sp, 16
+; NORMV-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
+; NORMV-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; NORMV-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; NORMV-NEXT: addi.d $sp, $sp, 32
; NORMV-NEXT: ret
bb:
%i = ashr i32 %arg, %arg1
diff --git a/llvm/test/CodeGen/RISCV/pr64935.ll b/llvm/test/CodeGen/RISCV/pr64935.ll
index 60be5fa6c994e..b712db0dc99d6 100644
--- a/llvm/test/CodeGen/RISCV/pr64935.ll
+++ b/llvm/test/CodeGen/RISCV/pr64935.ll
@@ -4,10 +4,7 @@
define i1 @f() {
; CHECK-LABEL: f:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: not a0, a0
-; CHECK-NEXT: sltiu a0, a0, 2
-; CHECK-NEXT: xori a0, a0, 1
+; CHECK-NEXT: li a0, 1
; CHECK-NEXT: ret
%B25 = shl i64 4294967296, -9223372036854775808
%B13 = sub i64 -1, -9223372036854775808
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index b155feab9b4d9..f048d67fa37f7 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -422,7 +422,7 @@ define void @test8(i32 signext %arg, i32 signext %arg1) nounwind {
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: call foo
; CHECK-NEXT: ori a0, a0, -256
-; CHECK-NEXT: bnez a0, .LBB7_1
+; CHECK-NEXT: j .LBB7_1
; CHECK-NEXT: # %bb.2: # %bb7
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
@@ -438,7 +438,7 @@ define void @test8(i32 signext %arg, i32 signext %arg1) nounwind {
; NOREMOVAL-NEXT: sext.w a0, a0
; NOREMOVAL-NEXT: call foo
; NOREMOVAL-NEXT: ori a0, a0, -256
-; NOREMOVAL-NEXT: bnez a0, .LBB7_1
+; NOREMOVAL-NEXT: j .LBB7_1
; NOREMOVAL-NEXT: # %bb.2: # %bb7
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; NOREMOVAL-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
index cfb3e508576dd..9c7b76f5ec3ac 100644
--- a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
+++ b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
@@ -4,25 +4,18 @@
define signext i16 @f(ptr %bp, ptr %ss) {
; CHECK-LABEL: f:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: pushl %esi
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset %esi, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movb $1, %cl
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %cond_next127
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl (%eax), %edx
-; CHECK-NEXT: movl (%ecx), %esi
; CHECK-NEXT: andl $15, %edx
-; CHECK-NEXT: andl $15, %esi
-; CHECK-NEXT: addl %esi, (%ecx)
-; CHECK-NEXT: cmpl $63, %edx
-; CHECK-NEXT: jb .LBB0_1
+; CHECK-NEXT: addl %edx, (%eax)
+; CHECK-NEXT: testb %cl, %cl
+; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: popl %esi
-; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl
entry:
br label %cond_next127
diff --git a/llvm/test/CodeGen/X86/apx/or.ll b/llvm/test/CodeGen/X86/apx/or.ll
index 514a7d83b78b0..d75d4225d2880 100644
--- a/llvm/test/CodeGen/X86/apx/or.ll
+++ b/llvm/test/CodeGen/X86/apx/or.ll
@@ -621,18 +621,18 @@ define i1 @orflag64rm(ptr %ptr, i64 %b) {
define i1 @orflag8ri(i8 %a) {
; CHECK-LABEL: orflag8ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: orb $-124, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xcf,0x84]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
+; CHECK-NEXT: orb $-124, %dil, %al # encoding: [0x62,0xf4,0x7c,0x18,0x80,0xcf,0x84]
+; CHECK-NEXT: movb %al, d64(%rip) # encoding: [0x88,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64, kind: reloc_riprel_4byte
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag8ri:
; NF: # %bb.0:
-; NF-NEXT: orb $-124, %dil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xcf,0x84]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
+; NF-NEXT: {nf} orb $-124, %dil, %al # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x80,0xcf,0x84]
+; NF-NEXT: movb %al, d64(%rip) # encoding: [0x88,0x05,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64, kind: reloc_riprel_4byte
+; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; NF-NEXT: retq # encoding: [0xc3]
%xor = xor i8 123, -1
%v0 = or i8 %a, %xor ; 0xff << 50
@@ -644,20 +644,20 @@ define i1 @orflag8ri(i8 %a) {
define i1 @orflag16ri(i16 %a) {
; CHECK-LABEL: orflag16ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: orw $-1235, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xcf,0x2d,0xfb]
+; CHECK-NEXT: orw $-1235, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x81,0xcf,0x2d,0xfb]
; CHECK-NEXT: # imm = 0xFB2D
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movw %ax, d64(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64, kind: reloc_riprel_4byte
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag16ri:
; NF: # %bb.0:
-; NF-NEXT: orw $-1235, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xcf,0x2d,0xfb]
+; NF-NEXT: {nf} orw $-1235, %di, %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x81,0xcf,0x2d,0xfb]
; NF-NEXT: # imm = 0xFB2D
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movw %ax, d64(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64, kind: reloc_riprel_4byte
+; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; NF-NEXT: retq # encoding: [0xc3]
%xor = xor i16 1234, -1
%v0 = or i16 %a, %xor ; 0xff << 50
@@ -671,18 +671,18 @@ define i1 @orflag32ri(i32 %a) {
; CHECK: # %bb.0:
; CHECK-NEXT: orl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xcf,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64, kind: reloc_riprel_4byte
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32ri:
; NF: # %bb.0:
; NF-NEXT: orl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xcf,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64, kind: reloc_riprel_4byte
+; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i32 %a, 123456 ; 0xff << 50
%v1 = icmp eq i32 %v0, 0
@@ -695,18 +695,18 @@ define i1 @orflag64ri(i64 %a) {
; CHECK: # %bb.0:
; CHECK-NEXT: orq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xcf,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64, kind: reloc_riprel_4byte
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64ri:
; NF: # %bb.0:
; NF-NEXT: orq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xcf,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64, kind: reloc_riprel_4byte
+; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i64 %a, 123456 ; 0xff << 50
%v1 = icmp eq i64 %v0, 0
@@ -717,18 +717,18 @@ define i1 @orflag64ri(i64 %a) {
define i1 @orflag16ri8(i16 %a) {
; CHECK-LABEL: orflag16ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: orw $-124, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xcf,0x84]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: orw $-124, %di, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x83,0xcf,0x84]
+; CHECK-NEXT: movw %ax, d64(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64, kind: reloc_riprel_4byte
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag16ri8:
; NF: # %bb.0:
-; NF-NEXT: orw $-124, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xcf,0x84]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
+; NF-NEXT: {nf} orw $-124, %di, %ax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7d,0x1c,0x83,0xcf,0x84]
+; NF-NEXT: movw %ax, d64(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64, kind: reloc_riprel_4byte
+; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; NF-NEXT: retq # encoding: [0xc3]
%xor = xor i16 123, -1
%v0 = or i16 %a, %xor ; 0xff << 50
@@ -741,17 +741,17 @@ define i1 @orflag32ri8(i32 %a) {
; CHECK-LABEL: orflag32ri8:
; CHECK: # %bb.0:
; CHECK-NEXT: orl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xcf,0x7b]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64, kind: reloc_riprel_4byte
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32ri8:
; NF: # %bb.0:
; NF-NEXT: orl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xcf,0x7b]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64, kind: reloc_riprel_4byte
+; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i32 %a, 123 ; 0xff << 50
%v1 = icmp eq i32 %v0, 0
@@ -763,17 +763,17 @@ define i1 @orflag64ri8(i64 %a) {
; CHECK-LABEL: orflag64ri8:
; CHECK: # %bb.0:
; CHECK-NEXT: orq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xcf,0x7b]
-; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64, kind: reloc_riprel_4byte
+; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64ri8:
; NF: # %bb.0:
; NF-NEXT: orq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xcf,0x7b]
-; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64, kind: reloc_riprel_4byte
+; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i64 %a, 123 ; 0xff << 50
%v1 = icmp eq i64 %v0, 0
More information about the llvm-commits
mailing list