[llvm-branch-commits] [llvm] [AMDGPU] Support Wave Reduction for i16 types - 3 (PR #194812)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue May 5 07:14:24 PDT 2026
https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/194812
>From b0f511321f4c0066d580e31c451a03dd25260a8d Mon Sep 17 00:00:00 2001
From: Aaditya <Aaditya.AlokDeshpande at amd.com>
Date: Tue, 28 Apr 2026 15:26:08 +0530
Subject: [PATCH 1/3] [AMDGPU] Support Wave Reduction for i16 types - 3
Supported Ops: `and`, `or`, `xor`.
Supports only the iterative stratergy, DPP is yet
to be supported.
Supports only Fake-16 versions of the lowering.
True-16 support is yet to be added.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 14 +-
llvm/lib/Target/AMDGPU/SIInstructions.td | 3 +
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll | 627 +++++++++++----
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll | 627 +++++++++++----
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll | 751 ++++++++++++++----
5 files changed, 1588 insertions(+), 434 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 09211dd31fb84..43d6aa62ce0b7 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5689,7 +5689,10 @@ static bool is16bitWaveReduction(unsigned Opc) {
Opc == AMDGPU::WAVE_REDUCE_ADD_PSEUDO_I16 ||
Opc == AMDGPU::WAVE_REDUCE_ADD_PSEUDO_I16_t16 ||
Opc == AMDGPU::WAVE_REDUCE_SUB_PSEUDO_I16_t16 ||
- Opc == AMDGPU::WAVE_REDUCE_SUB_PSEUDO_I16;
+ Opc == AMDGPU::WAVE_REDUCE_SUB_PSEUDO_I16 ||
+ Opc == AMDGPU::WAVE_REDUCE_AND_PSEUDO_B16 ||
+ Opc == AMDGPU::WAVE_REDUCE_OR_PSEUDO_B16 ||
+ Opc == AMDGPU::WAVE_REDUCE_XOR_PSEUDO_B16;
}
static bool is32bitWaveReduceOperation(unsigned Opc) {
@@ -5840,6 +5843,8 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
case AMDGPU::S_MAX_U32:
case AMDGPU::S_MAX_I32:
case AMDGPU::V_MAX_F32_e64:
+ case AMDGPU::V_AND_B16_fake16_e64:
+ case AMDGPU::V_OR_B16_fake16_e64:
case AMDGPU::S_AND_B32:
case AMDGPU::S_OR_B32: {
// Idempotent operations.
@@ -5862,6 +5867,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
RetBB = &BB;
break;
}
+ case AMDGPU::V_XOR_B16_fake16_e64:
case AMDGPU::S_XOR_B32:
case AMDGPU::S_XOR_B64:
case AMDGPU::S_ADD_I32:
@@ -5891,6 +5897,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
.addReg(ExecMask);
switch (Opc) {
+ case AMDGPU::V_XOR_B16_fake16_e64:
case AMDGPU::S_XOR_B32:
case AMDGPU::S_XOR_B64: {
// Performing an XOR operation on a uniform value
@@ -5904,7 +5911,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
.addReg(NewAccumulator->getOperand(0).getReg())
.addImm(1)
.setOperandDead(3); // Dead scc
- if (Opc == AMDGPU::S_XOR_B32) {
+ if (Opc == AMDGPU::S_XOR_B32 || Opc == AMDGPU::V_XOR_B16_fake16_e64) {
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
.addReg(SrcReg)
.addReg(ParityRegister);
@@ -6802,14 +6809,17 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
ST.getGeneration() >= AMDGPUSubtarget::GFX12
? AMDGPU::V_ADD_F64_pseudo_e64
: AMDGPU::V_ADD_F64_e64);
+ case AMDGPU::WAVE_REDUCE_AND_PSEUDO_B16:
case AMDGPU::WAVE_REDUCE_AND_PSEUDO_B32:
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_AND_B32);
case AMDGPU::WAVE_REDUCE_AND_PSEUDO_B64:
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_AND_B64);
+ case AMDGPU::WAVE_REDUCE_OR_PSEUDO_B16:
case AMDGPU::WAVE_REDUCE_OR_PSEUDO_B32:
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_OR_B32);
case AMDGPU::WAVE_REDUCE_OR_PSEUDO_B64:
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_OR_B64);
+ case AMDGPU::WAVE_REDUCE_XOR_PSEUDO_B16:
case AMDGPU::WAVE_REDUCE_XOR_PSEUDO_B32:
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_XOR_B32);
case AMDGPU::WAVE_REDUCE_XOR_PSEUDO_B64:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 1afcb3ecc38ba..8d2432e680a5b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -414,6 +414,9 @@ defvar Operations = [
WaveReduceOp<"max", "I16", i16, SGPR_32, VSrc_b16, NotUseRealTrue16Insts>,
WaveReduceOp<"add", "I16", i16, SGPR_32, VSrc_b16, NotUseRealTrue16Insts>,
WaveReduceOp<"sub", "I16", i16, SGPR_32, VSrc_b16, NotUseRealTrue16Insts>,
+ WaveReduceOp<"and", "B16", i16, SGPR_32, VSrc_b16, NotUseRealTrue16Insts>,
+ WaveReduceOp<"or", "B16", i16, SGPR_32, VSrc_b16, NotUseRealTrue16Insts>,
+ WaveReduceOp<"xor", "B16", i16, SGPR_32, VSrc_b16, NotUseRealTrue16Insts>,
WaveReduceOp<"umin", "U16_t16", i16, SGPR_32, VSrcT_b16, UseRealTrue16Insts>,
WaveReduceOp<"min", "I16_t16", i16, SGPR_32, VSrcT_b16, UseRealTrue16Insts>,
WaveReduceOp<"umax", "U16_t16", i16, SGPR_32, VSrcT_b16, UseRealTrue16Insts>,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
index 0774b8b5617cc..576f2a48bca62 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
@@ -7,10 +7,360 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1064GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1032DAGISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1032GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL,GFX1164DAGISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL,GFX1164GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL,GFX1132DAGISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL,GFX1132GISEL-FAKE16 %s
+
+define amdgpu_kernel void @uniform_value_i16(ptr addrspace(1) %out, i16 %in) {
+; GFX8DAGISEL-LABEL: uniform_value_i16:
+; GFX8DAGISEL: ; %bb.0: ; %entry
+; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8DAGISEL-NEXT: s_endpgm
+;
+; GFX8GISEL-LABEL: uniform_value_i16:
+; GFX8GISEL: ; %bb.0: ; %entry
+; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX8GISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8GISEL-NEXT: s_endpgm
+;
+; GFX9DAGISEL-LABEL: uniform_value_i16:
+; GFX9DAGISEL: ; %bb.0: ; %entry
+; GFX9DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT: s_endpgm
+;
+; GFX9GISEL-LABEL: uniform_value_i16:
+; GFX9GISEL: ; %bb.0: ; %entry
+; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT: global_store_short v1, v0, s[0:1]
+; GFX9GISEL-NEXT: s_endpgm
+;
+; GFX10DAGISEL-LABEL: uniform_value_i16:
+; GFX10DAGISEL: ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT: s_clause 0x1
+; GFX10DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX10DAGISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX10DAGISEL-NEXT: s_endpgm
+;
+; GFX10GISEL-LABEL: uniform_value_i16:
+; GFX10GISEL: ; %bb.0: ; %entry
+; GFX10GISEL-NEXT: s_clause 0x1
+; GFX10GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX10GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX10GISEL-NEXT: global_store_short v1, v0, s[0:1]
+; GFX10GISEL-NEXT: s_endpgm
+;
+; GFX1164DAGISEL-LABEL: uniform_value_i16:
+; GFX1164DAGISEL: ; %bb.0: ; %entry
+; GFX1164DAGISEL-NEXT: s_clause 0x1
+; GFX1164DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1164DAGISEL-NEXT: s_endpgm
+;
+; GFX1164GISEL-LABEL: uniform_value_i16:
+; GFX1164GISEL: ; %bb.0: ; %entry
+; GFX1164GISEL-NEXT: s_clause 0x1
+; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1164GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1164GISEL-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1164GISEL-NEXT: s_endpgm
+;
+; GFX1132DAGISEL-LABEL: uniform_value_i16:
+; GFX1132DAGISEL: ; %bb.0: ; %entry
+; GFX1132DAGISEL-NEXT: s_clause 0x1
+; GFX1132DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1132DAGISEL-NEXT: s_endpgm
+;
+; GFX1132GISEL-LABEL: uniform_value_i16:
+; GFX1132GISEL: ; %bb.0: ; %entry
+; GFX1132GISEL-NEXT: s_clause 0x1
+; GFX1132GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1132GISEL-NEXT: s_endpgm
+ entry:
+ %result = call i16 @llvm.amdgcn.wave.reduce.and.i16(i16 %in, i32 1)
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
+; GFX8DAGISEL-LABEL: divergnet_value_i16:
+; GFX8DAGISEL: ; %bb.0: ; %entry
+; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8DAGISEL-NEXT: s_mov_b32 s6, -1
+; GFX8DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX8DAGISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX8DAGISEL-NEXT: s_and_b32 s6, s6, s8
+; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT: ; %bb.2:
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX8DAGISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8GISEL-LABEL: divergnet_value_i16:
+; GFX8GISEL: ; %bb.0: ; %entry
+; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8GISEL-NEXT: s_mov_b32 s6, -1
+; GFX8GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX8GISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX8GISEL-NEXT: s_and_b32 s6, s6, s8
+; GFX8GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT: ; %bb.2:
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX8GISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9DAGISEL-LABEL: divergnet_value_i16:
+; GFX9DAGISEL: ; %bb.0: ; %entry
+; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9DAGISEL-NEXT: s_mov_b32 s6, -1
+; GFX9DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX9DAGISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX9DAGISEL-NEXT: s_and_b32 s6, s6, s8
+; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT: ; %bb.2:
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX9DAGISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9GISEL-LABEL: divergnet_value_i16:
+; GFX9GISEL: ; %bb.0: ; %entry
+; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9GISEL-NEXT: s_mov_b32 s6, -1
+; GFX9GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX9GISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX9GISEL-NEXT: s_and_b32 s6, s6, s8
+; GFX9GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT: ; %bb.2:
+; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX9GISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX9GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1064DAGISEL-LABEL: divergnet_value_i16:
+; GFX1064DAGISEL: ; %bb.0: ; %entry
+; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1064DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX1064DAGISEL-NEXT: s_mov_b32 s6, -1
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX1064DAGISEL-NEXT: s_and_b32 s6, s6, s8
+; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT: ; %bb.2:
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX1064DAGISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1064DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1064GISEL-LABEL: divergnet_value_i16:
+; GFX1064GISEL: ; %bb.0: ; %entry
+; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1064GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX1064GISEL-NEXT: s_mov_b32 s6, -1
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX1064GISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX1064GISEL-NEXT: s_and_b32 s6, s6, s8
+; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT: ; %bb.2:
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX1064GISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1064GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1032DAGISEL-LABEL: divergnet_value_i16:
+; GFX1032DAGISEL: ; %bb.0: ; %entry
+; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1032DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1032DAGISEL-NEXT: s_mov_b32 s5, exec_lo
+; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s6, s5
+; GFX1032DAGISEL-NEXT: v_readlane_b32 s7, v2, s6
+; GFX1032DAGISEL-NEXT: s_bitset0_b32 s5, s6
+; GFX1032DAGISEL-NEXT: s_and_b32 s4, s4, s7
+; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s5, 0
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT: ; %bb.2:
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX1032DAGISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1032DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1032GISEL-LABEL: divergnet_value_i16:
+; GFX1032GISEL: ; %bb.0: ; %entry
+; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1032GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1032GISEL-NEXT: s_mov_b32 s5, exec_lo
+; GFX1032GISEL-NEXT: s_mov_b32 s4, -1
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: s_ff1_i32_b32 s6, s5
+; GFX1032GISEL-NEXT: v_readlane_b32 s7, v2, s6
+; GFX1032GISEL-NEXT: s_bitset0_b32 s5, s6
+; GFX1032GISEL-NEXT: s_and_b32 s4, s4, s7
+; GFX1032GISEL-NEXT: s_cmp_lg_u32 s5, 0
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT: ; %bb.2:
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX1032GISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1032GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1164DAGISEL-LABEL: divergnet_value_i16:
+; GFX1164DAGISEL: ; %bb.0: ; %entry
+; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1164DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164DAGISEL-NEXT: s_mov_b32 s2, -1
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s3, s[0:1]
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
+; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[0:1], s3
+; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, s4
+; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT: ; %bb.2:
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1164DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1164DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1164GISEL-LABEL: divergnet_value_i16:
+; GFX1164GISEL: ; %bb.0: ; %entry
+; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1164GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164GISEL-NEXT: s_mov_b32 s2, -1
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: s_ctz_i32_b64 s3, s[0:1]
+; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s3
+; GFX1164GISEL-NEXT: s_bitset0_b64 s[0:1], s3
+; GFX1164GISEL-NEXT: s_and_b32 s2, s2, s4
+; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT: ; %bb.2:
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1164GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1164GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1132DAGISEL-LABEL: divergnet_value_i16:
+; GFX1132DAGISEL: ; %bb.0: ; %entry
+; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1132DAGISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s2, s1
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: v_readlane_b32 s3, v2, s2
+; GFX1132DAGISEL-NEXT: s_bitset0_b32 s1, s2
+; GFX1132DAGISEL-NEXT: s_and_b32 s0, s0, s3
+; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT: ; %bb.2:
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1132DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1132DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1132GISEL-LABEL: divergnet_value_i16:
+; GFX1132GISEL: ; %bb.0: ; %entry
+; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1132GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1132GISEL-NEXT: s_mov_b32 s0, -1
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: s_ctz_i32_b32 s2, s1
+; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_readlane_b32 s3, v2, s2
+; GFX1132GISEL-NEXT: s_bitset0_b32 s1, s2
+; GFX1132GISEL-NEXT: s_and_b32 s0, s0, s3
+; GFX1132GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT: ; %bb.2:
+; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1132GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1132GISEL-NEXT: s_setpc_b64 s[30:31]
+ entry:
+ %result = call i16 @llvm.amdgcn.wave.reduce.and.i16(i16 %in, i32 1)
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
define amdgpu_kernel void @uniform_value(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-LABEL: uniform_value:
@@ -130,13 +480,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, s6
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -150,13 +500,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s4, -1
-; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8GISEL-NEXT: s_and_b32 s4, s4, s6
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -171,13 +521,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, s6
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -189,13 +539,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s4, -1
-; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9GISEL-NEXT: s_and_b32 s4, s4, s6
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -209,13 +559,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, s6
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -227,13 +577,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s4, -1
-; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064GISEL-NEXT: s_and_b32 s4, s4, s6
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -247,13 +597,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1
-; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, s5
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -265,13 +615,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s2, -1
-; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032GISEL-NEXT: s_and_b32 s2, s2, s5
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -286,14 +636,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -306,14 +656,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s4, -1
-; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164GISEL-NEXT: s_and_b32 s4, s4, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -327,14 +677,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1
-; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -347,14 +697,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s2, -1
-; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132GISEL-NEXT: s_and_b32 s2, s2, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2012,20 +2362,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX8DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8DAGISEL-NEXT: s_and_b32 s6, s6, s8
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX8DAGISEL-NEXT: ; %bb.5:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX8DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2040,30 +2390,30 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $sgpr2
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b32 s2, s2
-; GFX8GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB8_2: ; %Flow
; GFX8GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s6, -1
-; GFX8GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8GISEL-NEXT: s_and_b32 s6, s6, s8
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX8GISEL-NEXT: ; %bb.5:
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8GISEL-NEXT: .LBB6_6: ; %endif
+; GFX8GISEL-NEXT: .LBB8_6: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2086,20 +2436,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX9DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9DAGISEL-NEXT: s_and_b32 s6, s6, s8
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX9DAGISEL-NEXT: ; %bb.5:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX9DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2113,30 +2463,30 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $sgpr2
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b32 s2, s2
-; GFX9GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB8_2: ; %Flow
; GFX9GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s6, -1
-; GFX9GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9GISEL-NEXT: s_and_b32 s6, s6, s8
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX9GISEL-NEXT: ; %bb.5:
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9GISEL-NEXT: .LBB6_6: ; %endif
+; GFX9GISEL-NEXT: .LBB8_6: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2158,20 +2508,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX1064DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064DAGISEL-NEXT: s_and_b32 s6, s6, s8
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1064DAGISEL-NEXT: ; %bb.5:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1064DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2185,30 +2535,30 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr2
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b32 s2, s2
-; GFX1064GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1064GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s6, -1
-; GFX1064GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064GISEL-NEXT: s_and_b32 s6, s6, s8
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1064GISEL-NEXT: ; %bb.5:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1064GISEL-NEXT: .LBB8_6: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2230,20 +2580,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s1, -1
-; GFX1032DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032DAGISEL-NEXT: s_and_b32 s1, s1, s6
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1032DAGISEL-NEXT: ; %bb.5:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1032DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2257,30 +2607,30 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr1
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b32 s1, s1
-; GFX1032GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1032GISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s1, -1
-; GFX1032GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032GISEL-NEXT: s_and_b32 s1, s1, s6
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1032GISEL-NEXT: ; %bb.5:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1032GISEL-NEXT: .LBB8_6: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2304,21 +2654,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX1164DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164DAGISEL-NEXT: s_and_b32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1164DAGISEL-NEXT: ; %bb.5:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1164DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2334,31 +2684,31 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b32 s2, s2
-; GFX1164GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1164GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s6, -1
-; GFX1164GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164GISEL-NEXT: s_and_b32 s6, s6, s8
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1164GISEL-NEXT: ; %bb.5:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1164GISEL-NEXT: .LBB8_6: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2382,21 +2732,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, -1
-; GFX1132DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_and_b32 s1, s1, s6
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1132DAGISEL-NEXT: ; %bb.5:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1132DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2412,31 +2762,31 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b32 s1, s1
-; GFX1132GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1132GISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s1, -1
-; GFX1132GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_and_b32 s1, s1, s6
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1132GISEL-NEXT: ; %bb.5:
; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1132GISEL-NEXT: .LBB8_6: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2574,14 +2924,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], -1
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2594,14 +2944,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], -1
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2614,14 +2964,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], -1
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2634,14 +2984,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], -1
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2654,14 +3004,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], -1
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2673,14 +3023,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], -1
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2692,14 +3042,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], -1
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2711,14 +3061,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], -1
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2730,7 +3080,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], -1
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -2738,7 +3088,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164DAGISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2750,7 +3100,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], -1
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -2758,7 +3108,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2770,7 +3120,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], -1
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2778,7 +3128,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2789,7 +3139,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], -1
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2797,7 +3147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2838,24 +3188,24 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB11_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX8GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], s[4:5]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB11_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2890,24 +3240,24 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB11_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX9GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], s[6:7]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB11_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -2942,24 +3292,24 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX1064GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], s[6:7]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB11_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -2994,24 +3344,24 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX1032GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s2
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], s[6:7]
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB11_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -3050,17 +3400,17 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX1164GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -3068,7 +3418,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB11_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
@@ -3105,23 +3455,23 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s2
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[4:5], s[4:5]
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB11_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
@@ -3144,3 +3494,8 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1132DAGISEL-FAKE16: {{.*}}
+; GFX1132GISEL-FAKE16: {{.*}}
+; GFX1164DAGISEL-FAKE16: {{.*}}
+; GFX1164GISEL-FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
index 837c9a87e4ae6..46d78bf7d980b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
@@ -7,10 +7,360 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1064GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1032DAGISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1032GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL,GFX1164DAGISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL,GFX1164GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL,GFX1132DAGISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL,GFX1132GISEL-FAKE16 %s
+
+define amdgpu_kernel void @uniform_value_i16(ptr addrspace(1) %out, i16 %in) {
+; GFX8DAGISEL-LABEL: uniform_value_i16:
+; GFX8DAGISEL: ; %bb.0: ; %entry
+; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8DAGISEL-NEXT: s_endpgm
+;
+; GFX8GISEL-LABEL: uniform_value_i16:
+; GFX8GISEL: ; %bb.0: ; %entry
+; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX8GISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8GISEL-NEXT: s_endpgm
+;
+; GFX9DAGISEL-LABEL: uniform_value_i16:
+; GFX9DAGISEL: ; %bb.0: ; %entry
+; GFX9DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT: s_endpgm
+;
+; GFX9GISEL-LABEL: uniform_value_i16:
+; GFX9GISEL: ; %bb.0: ; %entry
+; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT: global_store_short v1, v0, s[0:1]
+; GFX9GISEL-NEXT: s_endpgm
+;
+; GFX10DAGISEL-LABEL: uniform_value_i16:
+; GFX10DAGISEL: ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT: s_clause 0x1
+; GFX10DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX10DAGISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX10DAGISEL-NEXT: s_endpgm
+;
+; GFX10GISEL-LABEL: uniform_value_i16:
+; GFX10GISEL: ; %bb.0: ; %entry
+; GFX10GISEL-NEXT: s_clause 0x1
+; GFX10GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX10GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX10GISEL-NEXT: global_store_short v1, v0, s[0:1]
+; GFX10GISEL-NEXT: s_endpgm
+;
+; GFX1164DAGISEL-LABEL: uniform_value_i16:
+; GFX1164DAGISEL: ; %bb.0: ; %entry
+; GFX1164DAGISEL-NEXT: s_clause 0x1
+; GFX1164DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1164DAGISEL-NEXT: s_endpgm
+;
+; GFX1164GISEL-LABEL: uniform_value_i16:
+; GFX1164GISEL: ; %bb.0: ; %entry
+; GFX1164GISEL-NEXT: s_clause 0x1
+; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1164GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1164GISEL-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1164GISEL-NEXT: s_endpgm
+;
+; GFX1132DAGISEL-LABEL: uniform_value_i16:
+; GFX1132DAGISEL: ; %bb.0: ; %entry
+; GFX1132DAGISEL-NEXT: s_clause 0x1
+; GFX1132DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1132DAGISEL-NEXT: s_endpgm
+;
+; GFX1132GISEL-LABEL: uniform_value_i16:
+; GFX1132GISEL: ; %bb.0: ; %entry
+; GFX1132GISEL-NEXT: s_clause 0x1
+; GFX1132GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1132GISEL-NEXT: s_endpgm
+ entry:
+ %result = call i16 @llvm.amdgcn.wave.reduce.or.i16(i16 %in, i32 1)
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
+; GFX8DAGISEL-LABEL: divergnet_value_i16:
+; GFX8DAGISEL: ; %bb.0: ; %entry
+; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX8DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX8DAGISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX8DAGISEL-NEXT: s_or_b32 s6, s6, s8
+; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT: ; %bb.2:
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX8DAGISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8GISEL-LABEL: divergnet_value_i16:
+; GFX8GISEL: ; %bb.0: ; %entry
+; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8GISEL-NEXT: s_mov_b32 s6, 0
+; GFX8GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX8GISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX8GISEL-NEXT: s_or_b32 s6, s6, s8
+; GFX8GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT: ; %bb.2:
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX8GISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9DAGISEL-LABEL: divergnet_value_i16:
+; GFX9DAGISEL: ; %bb.0: ; %entry
+; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX9DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX9DAGISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX9DAGISEL-NEXT: s_or_b32 s6, s6, s8
+; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT: ; %bb.2:
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX9DAGISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9GISEL-LABEL: divergnet_value_i16:
+; GFX9GISEL: ; %bb.0: ; %entry
+; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9GISEL-NEXT: s_mov_b32 s6, 0
+; GFX9GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX9GISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX9GISEL-NEXT: s_or_b32 s6, s6, s8
+; GFX9GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT: ; %bb.2:
+; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX9GISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX9GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1064DAGISEL-LABEL: divergnet_value_i16:
+; GFX1064DAGISEL: ; %bb.0: ; %entry
+; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1064DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX1064DAGISEL-NEXT: s_or_b32 s6, s6, s8
+; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT: ; %bb.2:
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX1064DAGISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1064DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1064GISEL-LABEL: divergnet_value_i16:
+; GFX1064GISEL: ; %bb.0: ; %entry
+; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1064GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX1064GISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX1064GISEL-NEXT: s_or_b32 s6, s6, s8
+; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT: ; %bb.2:
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX1064GISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1064GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1032DAGISEL-LABEL: divergnet_value_i16:
+; GFX1032DAGISEL: ; %bb.0: ; %entry
+; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1032DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1032DAGISEL-NEXT: s_mov_b32 s5, exec_lo
+; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s6, s5
+; GFX1032DAGISEL-NEXT: v_readlane_b32 s7, v2, s6
+; GFX1032DAGISEL-NEXT: s_bitset0_b32 s5, s6
+; GFX1032DAGISEL-NEXT: s_or_b32 s4, s4, s7
+; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s5, 0
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT: ; %bb.2:
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX1032DAGISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1032DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1032GISEL-LABEL: divergnet_value_i16:
+; GFX1032GISEL: ; %bb.0: ; %entry
+; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1032GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1032GISEL-NEXT: s_mov_b32 s5, exec_lo
+; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: s_ff1_i32_b32 s6, s5
+; GFX1032GISEL-NEXT: v_readlane_b32 s7, v2, s6
+; GFX1032GISEL-NEXT: s_bitset0_b32 s5, s6
+; GFX1032GISEL-NEXT: s_or_b32 s4, s4, s7
+; GFX1032GISEL-NEXT: s_cmp_lg_u32 s5, 0
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT: ; %bb.2:
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX1032GISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1032GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1164DAGISEL-LABEL: divergnet_value_i16:
+; GFX1164DAGISEL: ; %bb.0: ; %entry
+; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1164DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164DAGISEL-NEXT: s_mov_b32 s2, 0
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s3, s[0:1]
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
+; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[0:1], s3
+; GFX1164DAGISEL-NEXT: s_or_b32 s2, s2, s4
+; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT: ; %bb.2:
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1164DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1164DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1164GISEL-LABEL: divergnet_value_i16:
+; GFX1164GISEL: ; %bb.0: ; %entry
+; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1164GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: s_ctz_i32_b64 s3, s[0:1]
+; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s3
+; GFX1164GISEL-NEXT: s_bitset0_b64 s[0:1], s3
+; GFX1164GISEL-NEXT: s_or_b32 s2, s2, s4
+; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT: ; %bb.2:
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1164GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1164GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1132DAGISEL-LABEL: divergnet_value_i16:
+; GFX1132DAGISEL: ; %bb.0: ; %entry
+; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1132DAGISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s2, s1
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: v_readlane_b32 s3, v2, s2
+; GFX1132DAGISEL-NEXT: s_bitset0_b32 s1, s2
+; GFX1132DAGISEL-NEXT: s_or_b32 s0, s0, s3
+; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT: ; %bb.2:
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1132DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1132DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1132GISEL-LABEL: divergnet_value_i16:
+; GFX1132GISEL: ; %bb.0: ; %entry
+; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1132GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: s_ctz_i32_b32 s2, s1
+; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_readlane_b32 s3, v2, s2
+; GFX1132GISEL-NEXT: s_bitset0_b32 s1, s2
+; GFX1132GISEL-NEXT: s_or_b32 s0, s0, s3
+; GFX1132GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT: ; %bb.2:
+; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1132GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1132GISEL-NEXT: s_setpc_b64 s[30:31]
+ entry:
+ %result = call i16 @llvm.amdgcn.wave.reduce.or.i16(i16 %in, i32 1)
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
define amdgpu_kernel void @uniform_value(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-LABEL: uniform_value:
@@ -130,13 +480,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8DAGISEL-NEXT: s_or_b32 s4, s4, s6
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -150,13 +500,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8GISEL-NEXT: s_or_b32 s4, s4, s6
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -171,13 +521,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9DAGISEL-NEXT: s_or_b32 s4, s4, s6
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -189,13 +539,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9GISEL-NEXT: s_or_b32 s4, s4, s6
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -209,13 +559,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064DAGISEL-NEXT: s_or_b32 s4, s4, s6
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -227,13 +577,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064GISEL-NEXT: s_or_b32 s4, s4, s6
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -247,13 +597,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032DAGISEL-NEXT: s_or_b32 s2, s2, s5
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -265,13 +615,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032GISEL-NEXT: s_or_b32 s2, s2, s5
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -286,14 +636,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164DAGISEL-NEXT: s_or_b32 s4, s4, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -306,14 +656,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164GISEL-NEXT: s_or_b32 s4, s4, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -327,14 +677,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132DAGISEL-NEXT: s_or_b32 s2, s2, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -347,14 +697,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132GISEL-NEXT: s_or_b32 s2, s2, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2012,20 +2362,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8DAGISEL-NEXT: s_or_b32 s6, s6, s8
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX8DAGISEL-NEXT: ; %bb.5:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX8DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2040,30 +2390,30 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $sgpr2
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b32 s2, s2
-; GFX8GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB8_2: ; %Flow
; GFX8GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s6, 0
-; GFX8GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8GISEL-NEXT: s_or_b32 s6, s6, s8
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX8GISEL-NEXT: ; %bb.5:
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8GISEL-NEXT: .LBB6_6: ; %endif
+; GFX8GISEL-NEXT: .LBB8_6: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2086,20 +2436,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9DAGISEL-NEXT: s_or_b32 s6, s6, s8
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX9DAGISEL-NEXT: ; %bb.5:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX9DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2113,30 +2463,30 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $sgpr2
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b32 s2, s2
-; GFX9GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB8_2: ; %Flow
; GFX9GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s6, 0
-; GFX9GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9GISEL-NEXT: s_or_b32 s6, s6, s8
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX9GISEL-NEXT: ; %bb.5:
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9GISEL-NEXT: .LBB6_6: ; %endif
+; GFX9GISEL-NEXT: .LBB8_6: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2158,20 +2508,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064DAGISEL-NEXT: s_or_b32 s6, s6, s8
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1064DAGISEL-NEXT: ; %bb.5:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1064DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2185,30 +2535,30 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr2
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b32 s2, s2
-; GFX1064GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1064GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064GISEL-NEXT: s_or_b32 s6, s6, s8
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1064GISEL-NEXT: ; %bb.5:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1064GISEL-NEXT: .LBB8_6: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2230,20 +2580,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032DAGISEL-NEXT: s_or_b32 s1, s1, s6
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1032DAGISEL-NEXT: ; %bb.5:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1032DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2257,30 +2607,30 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr1
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b32 s1, s1
-; GFX1032GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1032GISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s1, 0
-; GFX1032GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032GISEL-NEXT: s_or_b32 s1, s1, s6
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1032GISEL-NEXT: ; %bb.5:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1032GISEL-NEXT: .LBB8_6: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2304,21 +2654,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164DAGISEL-NEXT: s_or_b32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1164DAGISEL-NEXT: ; %bb.5:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1164DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2334,31 +2684,31 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b32 s2, s2
-; GFX1164GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1164GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164GISEL-NEXT: s_or_b32 s6, s6, s8
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1164GISEL-NEXT: ; %bb.5:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1164GISEL-NEXT: .LBB8_6: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2382,21 +2732,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_or_b32 s1, s1, s6
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1132DAGISEL-NEXT: ; %bb.5:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1132DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2412,31 +2762,31 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b32 s1, s1
-; GFX1132GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1132GISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s1, 0
-; GFX1132GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_or_b32 s1, s1, s6
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1132GISEL-NEXT: ; %bb.5:
; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1132GISEL-NEXT: .LBB8_6: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2574,14 +2924,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2594,14 +2944,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2614,14 +2964,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2634,14 +2984,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2654,14 +3004,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2673,14 +3023,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2692,14 +3042,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2711,14 +3061,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2730,7 +3080,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -2738,7 +3088,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164DAGISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2750,7 +3100,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -2758,7 +3108,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2770,7 +3120,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2778,7 +3128,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2789,7 +3139,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2797,7 +3147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2839,24 +3189,24 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB11_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX8GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], s[4:5]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB11_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2891,24 +3241,24 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB11_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX9GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], s[6:7]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB11_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -2943,24 +3293,24 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX1064GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], s[6:7]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB11_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -2995,24 +3345,24 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX1032GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s2
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], s[6:7]
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB11_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -3051,17 +3401,17 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX1164GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -3069,7 +3419,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB11_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
@@ -3106,23 +3456,23 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s2
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[4:5], s[4:5]
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB11_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
@@ -3145,3 +3495,8 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1132DAGISEL-FAKE16: {{.*}}
+; GFX1132GISEL-FAKE16: {{.*}}
+; GFX1164DAGISEL-FAKE16: {{.*}}
+; GFX1164GISEL-FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
index 6a852f91da521..0857ed23d484f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
@@ -7,10 +7,436 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1064GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1032DAGISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1032GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL,GFX1164DAGISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL,GFX1164GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL,GFX1132DAGISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL,GFX1132GISEL-FAKE16 %s
+
+define amdgpu_kernel void @uniform_value_i16(ptr addrspace(1) %out, i16 %in) {
+; GFX8DAGISEL-LABEL: uniform_value_i16:
+; GFX8DAGISEL: ; %bb.0: ; %entry
+; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8DAGISEL-NEXT: s_endpgm
+;
+; GFX8GISEL-LABEL: uniform_value_i16:
+; GFX8GISEL: ; %bb.0: ; %entry
+; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8GISEL-NEXT: s_and_b32 s3, 0xffff, s6
+; GFX8GISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX8GISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8GISEL-NEXT: s_endpgm
+;
+; GFX9DAGISEL-LABEL: uniform_value_i16:
+; GFX9DAGISEL: ; %bb.0: ; %entry
+; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT: s_endpgm
+;
+; GFX9GISEL-LABEL: uniform_value_i16:
+; GFX9GISEL: ; %bb.0: ; %entry
+; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9GISEL-NEXT: s_and_b32 s3, 0xffff, s6
+; GFX9GISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9GISEL-NEXT: global_store_short v1, v0, s[0:1]
+; GFX9GISEL-NEXT: s_endpgm
+;
+; GFX1064DAGISEL-LABEL: uniform_value_i16:
+; GFX1064DAGISEL: ; %bb.0: ; %entry
+; GFX1064DAGISEL-NEXT: s_clause 0x1
+; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX1064DAGISEL-NEXT: s_endpgm
+;
+; GFX1064GISEL-LABEL: uniform_value_i16:
+; GFX1064GISEL: ; %bb.0: ; %entry
+; GFX1064GISEL-NEXT: s_clause 0x1
+; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1064GISEL-NEXT: s_and_b32 s3, 0xffff, s6
+; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1064GISEL-NEXT: global_store_short v1, v0, s[0:1]
+; GFX1064GISEL-NEXT: s_endpgm
+;
+; GFX1032DAGISEL-LABEL: uniform_value_i16:
+; GFX1032DAGISEL: ; %bb.0: ; %entry
+; GFX1032DAGISEL-NEXT: s_clause 0x1
+; GFX1032DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s3, s3
+; GFX1032DAGISEL-NEXT: s_and_b32 s3, s3, 1
+; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s3
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT: global_store_short v0, v1, s[0:1]
+; GFX1032DAGISEL-NEXT: s_endpgm
+;
+; GFX1032GISEL-LABEL: uniform_value_i16:
+; GFX1032GISEL: ; %bb.0: ; %entry
+; GFX1032GISEL-NEXT: s_clause 0x1
+; GFX1032GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s3, s3
+; GFX1032GISEL-NEXT: s_and_b32 s3, s3, 1
+; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1032GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s3
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032GISEL-NEXT: global_store_short v1, v0, s[0:1]
+; GFX1032GISEL-NEXT: s_endpgm
+;
+; GFX1164DAGISEL-LABEL: uniform_value_i16:
+; GFX1164DAGISEL: ; %bb.0: ; %entry
+; GFX1164DAGISEL-NEXT: s_clause 0x1
+; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
+; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1164DAGISEL-NEXT: s_endpgm
+;
+; GFX1164GISEL-LABEL: uniform_value_i16:
+; GFX1164GISEL: ; %bb.0: ; %entry
+; GFX1164GISEL-NEXT: s_clause 0x1
+; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
+; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1164GISEL-NEXT: s_and_b32 s3, 0xffff, s6
+; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1164GISEL-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1164GISEL-NEXT: s_endpgm
+;
+; GFX1132DAGISEL-LABEL: uniform_value_i16:
+; GFX1132DAGISEL: ; %bb.0: ; %entry
+; GFX1132DAGISEL-NEXT: s_clause 0x1
+; GFX1132DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s3, s3
+; GFX1132DAGISEL-NEXT: s_and_b32 s3, s3, 1
+; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s3
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1132DAGISEL-NEXT: s_endpgm
+;
+; GFX1132GISEL-LABEL: uniform_value_i16:
+; GFX1132GISEL: ; %bb.0: ; %entry
+; GFX1132GISEL-NEXT: s_clause 0x1
+; GFX1132GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
+; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s3, s3
+; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: s_and_b32 s3, s3, 1
+; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132GISEL-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s3
+; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1132GISEL-NEXT: s_endpgm
+ entry:
+ %result = call i16 @llvm.amdgcn.wave.reduce.xor.i16(i16 %in, i32 1)
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
+; GFX8DAGISEL-LABEL: divergnet_value_i16:
+; GFX8DAGISEL: ; %bb.0: ; %entry
+; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX8DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX8DAGISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX8DAGISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT: ; %bb.2:
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX8DAGISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8GISEL-LABEL: divergnet_value_i16:
+; GFX8GISEL: ; %bb.0: ; %entry
+; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8GISEL-NEXT: s_mov_b32 s6, 0
+; GFX8GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX8GISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX8GISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX8GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT: ; %bb.2:
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX8GISEL-NEXT: flat_store_short v[0:1], v2
+; GFX8GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX8GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9DAGISEL-LABEL: divergnet_value_i16:
+; GFX9DAGISEL: ; %bb.0: ; %entry
+; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX9DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX9DAGISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX9DAGISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT: ; %bb.2:
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX9DAGISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9GISEL-LABEL: divergnet_value_i16:
+; GFX9GISEL: ; %bb.0: ; %entry
+; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9GISEL-NEXT: s_mov_b32 s6, 0
+; GFX9GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX9GISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX9GISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX9GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT: ; %bb.2:
+; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX9GISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX9GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX9GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1064DAGISEL-LABEL: divergnet_value_i16:
+; GFX1064DAGISEL: ; %bb.0: ; %entry
+; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1064DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX1064DAGISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT: ; %bb.2:
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX1064DAGISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1064DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1064GISEL-LABEL: divergnet_value_i16:
+; GFX1064GISEL: ; %bb.0: ; %entry
+; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1064GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5]
+; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s7
+; GFX1064GISEL-NEXT: s_bitset0_b64 s[4:5], s7
+; GFX1064GISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT: ; %bb.2:
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s6
+; GFX1064GISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1064GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1032DAGISEL-LABEL: divergnet_value_i16:
+; GFX1032DAGISEL: ; %bb.0: ; %entry
+; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1032DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1032DAGISEL-NEXT: s_mov_b32 s5, exec_lo
+; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s6, s5
+; GFX1032DAGISEL-NEXT: v_readlane_b32 s7, v2, s6
+; GFX1032DAGISEL-NEXT: s_bitset0_b32 s5, s6
+; GFX1032DAGISEL-NEXT: s_xor_b32 s4, s4, s7
+; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s5, 0
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT: ; %bb.2:
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX1032DAGISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1032DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1032GISEL-LABEL: divergnet_value_i16:
+; GFX1032GISEL: ; %bb.0: ; %entry
+; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1032GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1032GISEL-NEXT: s_mov_b32 s5, exec_lo
+; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: s_ff1_i32_b32 s6, s5
+; GFX1032GISEL-NEXT: v_readlane_b32 s7, v2, s6
+; GFX1032GISEL-NEXT: s_bitset0_b32 s5, s6
+; GFX1032GISEL-NEXT: s_xor_b32 s4, s4, s7
+; GFX1032GISEL-NEXT: s_cmp_lg_u32 s5, 0
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT: ; %bb.2:
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
+; GFX1032GISEL-NEXT: global_store_short v[0:1], v2, off
+; GFX1032GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1164DAGISEL-LABEL: divergnet_value_i16:
+; GFX1164DAGISEL: ; %bb.0: ; %entry
+; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1164DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164DAGISEL-NEXT: s_mov_b32 s2, 0
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s3, s[0:1]
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
+; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[0:1], s3
+; GFX1164DAGISEL-NEXT: s_xor_b32 s2, s2, s4
+; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT: ; %bb.2:
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1164DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1164DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1164GISEL-LABEL: divergnet_value_i16:
+; GFX1164GISEL: ; %bb.0: ; %entry
+; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1164GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: s_ctz_i32_b64 s3, s[0:1]
+; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s3
+; GFX1164GISEL-NEXT: s_bitset0_b64 s[0:1], s3
+; GFX1164GISEL-NEXT: s_xor_b32 s2, s2, s4
+; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[0:1], 0
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT: ; %bb.2:
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX1164GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1164GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1132DAGISEL-LABEL: divergnet_value_i16:
+; GFX1132DAGISEL: ; %bb.0: ; %entry
+; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1132DAGISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s2, s1
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: v_readlane_b32 s3, v2, s2
+; GFX1132DAGISEL-NEXT: s_bitset0_b32 s1, s2
+; GFX1132DAGISEL-NEXT: s_xor_b32 s0, s0, s3
+; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT: ; %bb.2:
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1132DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1132DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1132GISEL-LABEL: divergnet_value_i16:
+; GFX1132GISEL: ; %bb.0: ; %entry
+; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1132GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: s_ctz_i32_b32 s2, s1
+; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_readlane_b32 s3, v2, s2
+; GFX1132GISEL-NEXT: s_bitset0_b32 s1, s2
+; GFX1132GISEL-NEXT: s_xor_b32 s0, s0, s3
+; GFX1132GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT: ; %bb.2:
+; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1132GISEL-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1132GISEL-NEXT: s_setpc_b64 s[30:31]
+ entry:
+ %result = call i16 @llvm.amdgcn.wave.reduce.xor.i16(i16 %in, i32 1)
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
define amdgpu_kernel void @uniform_value(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-LABEL: uniform_value:
@@ -206,13 +632,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8DAGISEL-NEXT: s_xor_b32 s4, s4, s6
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -226,13 +652,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8GISEL-NEXT: s_xor_b32 s4, s4, s6
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -247,13 +673,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9DAGISEL-NEXT: s_xor_b32 s4, s4, s6
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -265,13 +691,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9GISEL-NEXT: s_xor_b32 s4, s4, s6
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -285,13 +711,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064DAGISEL-NEXT: s_xor_b32 s4, s4, s6
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -303,13 +729,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064GISEL-NEXT: s_xor_b32 s4, s4, s6
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -323,13 +749,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032DAGISEL-NEXT: s_xor_b32 s2, s2, s5
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -341,13 +767,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032GISEL-NEXT: s_xor_b32 s2, s2, s5
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -362,14 +788,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164DAGISEL-NEXT: s_xor_b32 s4, s4, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -382,14 +808,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164GISEL-NEXT: s_xor_b32 s4, s4, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -403,14 +829,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132DAGISEL-NEXT: s_xor_b32 s2, s2, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -423,14 +849,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132GISEL-NEXT: s_xor_b32 s2, s2, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2156,7 +2582,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2
; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -2165,24 +2591,24 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX8DAGISEL-NEXT: .LBB6_2: ; %Flow
+; GFX8DAGISEL-NEXT: .LBB8_2: ; %Flow
; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8DAGISEL-NEXT: s_xor_b32 s6, s6, s8
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX8DAGISEL-NEXT: ; %bb.5:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX8DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2197,7 +2623,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $sgpr2
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -2206,24 +2632,24 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX8GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB8_2: ; %Flow
; GFX8GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s6, 0
-; GFX8GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8GISEL-NEXT: s_xor_b32 s6, s6, s8
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX8GISEL-NEXT: ; %bb.5:
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8GISEL-NEXT: .LBB6_6: ; %endif
+; GFX8GISEL-NEXT: .LBB8_6: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -2238,7 +2664,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2
; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -2247,24 +2673,24 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX9DAGISEL-NEXT: .LBB6_2: ; %Flow
+; GFX9DAGISEL-NEXT: .LBB8_2: ; %Flow
; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9DAGISEL-NEXT: s_xor_b32 s6, s6, s8
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX9DAGISEL-NEXT: ; %bb.5:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX9DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2278,7 +2704,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $sgpr2
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -2287,24 +2713,24 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX9GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB8_2: ; %Flow
; GFX9GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s6, 0
-; GFX9GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9GISEL-NEXT: s_xor_b32 s6, s6, s8
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX9GISEL-NEXT: ; %bb.5:
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9GISEL-NEXT: .LBB6_6: ; %endif
+; GFX9GISEL-NEXT: .LBB8_6: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2318,7 +2744,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2
; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -2327,24 +2753,24 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX1064DAGISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1064DAGISEL-NEXT: .LBB8_2: ; %Flow
; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064DAGISEL-NEXT: s_xor_b32 s6, s6, s8
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1064DAGISEL-NEXT: ; %bb.5:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1064DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2358,7 +2784,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr2
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -2367,24 +2793,24 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX1064GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1064GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064GISEL-NEXT: s_xor_b32 s6, s6, s8
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1064GISEL-NEXT: ; %bb.5:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1064GISEL-NEXT: .LBB8_6: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2398,7 +2824,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1
; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
@@ -2407,24 +2833,24 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1032DAGISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1032DAGISEL-NEXT: .LBB8_2: ; %Flow
; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032DAGISEL-NEXT: s_xor_b32 s1, s1, s6
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1032DAGISEL-NEXT: ; %bb.5:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1032DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2438,7 +2864,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr1
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
@@ -2447,24 +2873,24 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1032GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1032GISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s1, 0
-; GFX1032GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032GISEL-NEXT: s_xor_b32 s1, s1, s6
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1032GISEL-NEXT: ; %bb.5:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1032GISEL-NEXT: .LBB8_6: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2480,7 +2906,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -2490,25 +2916,25 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX1164DAGISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1164DAGISEL-NEXT: .LBB8_2: ; %Flow
; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164DAGISEL-NEXT: s_xor_b32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1164DAGISEL-NEXT: ; %bb.5:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1164DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2524,7 +2950,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -2534,25 +2960,25 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX1164GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1164GISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164GISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164GISEL-NEXT: s_xor_b32 s6, s6, s8
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1164GISEL-NEXT: ; %bb.5:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1164GISEL-NEXT: .LBB8_6: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2568,7 +2994,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
@@ -2578,25 +3004,25 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1132DAGISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1132DAGISEL-NEXT: .LBB8_2: ; %Flow
; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_xor_b32 s1, s1, s6
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1132DAGISEL-NEXT: ; %bb.5:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB6_6: ; %endif
+; GFX1132DAGISEL-NEXT: .LBB8_6: ; %endif
; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2612,7 +3038,7 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB6_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB8_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
@@ -2622,25 +3048,25 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1132GISEL-NEXT: .LBB6_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB8_2: ; %Flow
; GFX1132GISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB6_6
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB8_6
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s1, 0
-; GFX1132GISEL-NEXT: .LBB6_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB8_4: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_xor_b32 s1, s1, s6
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB6_4
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_4
; GFX1132GISEL-NEXT: ; %bb.5:
; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132GISEL-NEXT: .LBB6_6: ; %endif
+; GFX1132GISEL-NEXT: .LBB8_6: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -2862,14 +3288,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2882,14 +3308,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2902,14 +3328,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2922,14 +3348,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2942,14 +3368,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2961,14 +3387,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2980,14 +3406,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2999,14 +3425,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -3018,7 +3444,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -3026,7 +3452,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -3038,7 +3464,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -3046,7 +3472,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -3058,7 +3484,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -3066,7 +3492,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -3077,7 +3503,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -3085,7 +3511,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -3105,7 +3531,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -3113,7 +3539,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX8DAGISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8DAGISEL-NEXT: .LBB11_2: ; %Flow
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -3141,7 +3567,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -3149,13 +3575,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX8GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB11_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX8GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -3166,7 +3592,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: s_mul_i32 s5, s5, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB11_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -3181,7 +3607,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
@@ -3189,7 +3615,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5
; GFX9DAGISEL-NEXT: s_mul_i32 s5, s3, s5
-; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9DAGISEL-NEXT: .LBB11_2: ; %Flow
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -3216,7 +3642,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -3224,13 +3650,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX9GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB11_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX9GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -3241,7 +3667,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: s_mul_i32 s5, s7, s5
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB11_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -3256,7 +3682,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
@@ -3264,7 +3690,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s2, s5
; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s3, s5
-; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064DAGISEL-NEXT: .LBB11_2: ; %Flow
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -3291,7 +3717,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -3299,13 +3725,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1064GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX1064GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -3316,7 +3742,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: s_mul_i32 s5, s7, s5
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB11_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -3331,7 +3757,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
@@ -3339,7 +3765,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s5
; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s3, s5
-; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032DAGISEL-NEXT: .LBB11_2: ; %Flow
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -3366,7 +3792,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6
@@ -3374,13 +3800,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1032GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX1032GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s2
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
@@ -3391,7 +3817,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: s_mul_i32 s5, s7, s3
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB11_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -3408,7 +3834,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1164DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1164DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -3417,7 +3843,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1164DAGISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164DAGISEL-NEXT: .LBB11_2: ; %Flow
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -3448,7 +3874,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -3457,13 +3883,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1164GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX1164GISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -3475,7 +3901,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_mul_i32 s5, s5, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB11_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
@@ -3492,7 +3918,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -3501,7 +3927,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1132DAGISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132DAGISEL-NEXT: .LBB11_2: ; %Flow
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
@@ -3531,7 +3957,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB11_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -3540,12 +3966,12 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1132GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB11_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s2
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB11_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
@@ -3557,7 +3983,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_mul_i32 s5, s5, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB11_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
@@ -3580,3 +4006,8 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX1132DAGISEL-FAKE16: {{.*}}
+; GFX1132GISEL-FAKE16: {{.*}}
+; GFX1164DAGISEL-FAKE16: {{.*}}
+; GFX1164GISEL-FAKE16: {{.*}}
>From 86a91353c5dcb705da40e534bb0d1a669408c4e7 Mon Sep 17 00:00:00 2001
From: Aaditya <Aaditya.AlokDeshpande at amd.com>
Date: Mon, 4 May 2026 14:20:25 +0530
Subject: [PATCH 2/3] Use only SALU opcodes
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +----
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll | 26 +++++++++----------
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll | 26 +++++++++----------
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll | 26 +++++++++----------
4 files changed, 40 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 43d6aa62ce0b7..caf2ffaf9ea3e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5843,8 +5843,6 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
case AMDGPU::S_MAX_U32:
case AMDGPU::S_MAX_I32:
case AMDGPU::V_MAX_F32_e64:
- case AMDGPU::V_AND_B16_fake16_e64:
- case AMDGPU::V_OR_B16_fake16_e64:
case AMDGPU::S_AND_B32:
case AMDGPU::S_OR_B32: {
// Idempotent operations.
@@ -5867,7 +5865,6 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
RetBB = &BB;
break;
}
- case AMDGPU::V_XOR_B16_fake16_e64:
case AMDGPU::S_XOR_B32:
case AMDGPU::S_XOR_B64:
case AMDGPU::S_ADD_I32:
@@ -5897,7 +5894,6 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
.addReg(ExecMask);
switch (Opc) {
- case AMDGPU::V_XOR_B16_fake16_e64:
case AMDGPU::S_XOR_B32:
case AMDGPU::S_XOR_B64: {
// Performing an XOR operation on a uniform value
@@ -5911,7 +5907,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
.addReg(NewAccumulator->getOperand(0).getReg())
.addImm(1)
.setOperandDead(3); // Dead scc
- if (Opc == AMDGPU::S_XOR_B32 || Opc == AMDGPU::V_XOR_B16_fake16_e64) {
+ if (Opc == AMDGPU::S_XOR_B32) {
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
.addReg(SrcReg)
.addReg(ParityRegister);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
index 576f2a48bca62..b1ad231e5b5d6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
@@ -132,8 +132,8 @@ define amdgpu_kernel void @uniform_value_i16(ptr addrspace(1) %out, i16 %in) {
ret void
}
-define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
-; GFX8DAGISEL-LABEL: divergnet_value_i16:
+define void @divergent_value_i16(ptr addrspace(1) %out, i16 %in) {
+; GFX8DAGISEL-LABEL: divergent_value_i16:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -152,7 +152,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8GISEL-LABEL: divergnet_value_i16:
+; GFX8GISEL-LABEL: divergent_value_i16:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -171,7 +171,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9DAGISEL-LABEL: divergnet_value_i16:
+; GFX9DAGISEL-LABEL: divergent_value_i16:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -190,7 +190,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9GISEL-LABEL: divergnet_value_i16:
+; GFX9GISEL-LABEL: divergent_value_i16:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -209,7 +209,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1064DAGISEL-LABEL: divergnet_value_i16:
+; GFX1064DAGISEL-LABEL: divergent_value_i16:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -227,7 +227,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1064DAGISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1064DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1064GISEL-LABEL: divergnet_value_i16:
+; GFX1064GISEL-LABEL: divergent_value_i16:
; GFX1064GISEL: ; %bb.0: ; %entry
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -245,7 +245,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1064GISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1064GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1032DAGISEL-LABEL: divergnet_value_i16:
+; GFX1032DAGISEL-LABEL: divergent_value_i16:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -263,7 +263,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1032DAGISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1032DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1032GISEL-LABEL: divergnet_value_i16:
+; GFX1032GISEL-LABEL: divergent_value_i16:
; GFX1032GISEL: ; %bb.0: ; %entry
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -281,7 +281,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1032GISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1032GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1164DAGISEL-LABEL: divergnet_value_i16:
+; GFX1164DAGISEL-LABEL: divergent_value_i16:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -300,7 +300,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1164DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX1164DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1164GISEL-LABEL: divergnet_value_i16:
+; GFX1164GISEL-LABEL: divergent_value_i16:
; GFX1164GISEL: ; %bb.0: ; %entry
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -319,7 +319,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1164GISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX1164GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1132DAGISEL-LABEL: divergnet_value_i16:
+; GFX1132DAGISEL-LABEL: divergent_value_i16:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -338,7 +338,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1132DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX1132DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1132GISEL-LABEL: divergnet_value_i16:
+; GFX1132GISEL-LABEL: divergent_value_i16:
; GFX1132GISEL: ; %bb.0: ; %entry
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
index 46d78bf7d980b..67cc39cf22d5c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
@@ -132,8 +132,8 @@ define amdgpu_kernel void @uniform_value_i16(ptr addrspace(1) %out, i16 %in) {
ret void
}
-define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
-; GFX8DAGISEL-LABEL: divergnet_value_i16:
+define void @divergent_value_i16(ptr addrspace(1) %out, i16 %in) {
+; GFX8DAGISEL-LABEL: divergent_value_i16:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -152,7 +152,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8GISEL-LABEL: divergnet_value_i16:
+; GFX8GISEL-LABEL: divergent_value_i16:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -171,7 +171,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9DAGISEL-LABEL: divergnet_value_i16:
+; GFX9DAGISEL-LABEL: divergent_value_i16:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -190,7 +190,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9GISEL-LABEL: divergnet_value_i16:
+; GFX9GISEL-LABEL: divergent_value_i16:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -209,7 +209,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1064DAGISEL-LABEL: divergnet_value_i16:
+; GFX1064DAGISEL-LABEL: divergent_value_i16:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -227,7 +227,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1064DAGISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1064DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1064GISEL-LABEL: divergnet_value_i16:
+; GFX1064GISEL-LABEL: divergent_value_i16:
; GFX1064GISEL: ; %bb.0: ; %entry
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -245,7 +245,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1064GISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1064GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1032DAGISEL-LABEL: divergnet_value_i16:
+; GFX1032DAGISEL-LABEL: divergent_value_i16:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -263,7 +263,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1032DAGISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1032DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1032GISEL-LABEL: divergnet_value_i16:
+; GFX1032GISEL-LABEL: divergent_value_i16:
; GFX1032GISEL: ; %bb.0: ; %entry
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -281,7 +281,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1032GISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1032GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1164DAGISEL-LABEL: divergnet_value_i16:
+; GFX1164DAGISEL-LABEL: divergent_value_i16:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -300,7 +300,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1164DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX1164DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1164GISEL-LABEL: divergnet_value_i16:
+; GFX1164GISEL-LABEL: divergent_value_i16:
; GFX1164GISEL: ; %bb.0: ; %entry
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -319,7 +319,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1164GISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX1164GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1132DAGISEL-LABEL: divergnet_value_i16:
+; GFX1132DAGISEL-LABEL: divergent_value_i16:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -338,7 +338,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1132DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX1132DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1132GISEL-LABEL: divergnet_value_i16:
+; GFX1132GISEL-LABEL: divergent_value_i16:
; GFX1132GISEL: ; %bb.0: ; %entry
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
index 0857ed23d484f..87579fedd3cc1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
@@ -208,8 +208,8 @@ define amdgpu_kernel void @uniform_value_i16(ptr addrspace(1) %out, i16 %in) {
ret void
}
-define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
-; GFX8DAGISEL-LABEL: divergnet_value_i16:
+define void @divergent_value_i16(ptr addrspace(1) %out, i16 %in) {
+; GFX8DAGISEL-LABEL: divergent_value_i16:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -228,7 +228,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX8GISEL-LABEL: divergnet_value_i16:
+; GFX8GISEL-LABEL: divergent_value_i16:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -247,7 +247,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9DAGISEL-LABEL: divergnet_value_i16:
+; GFX9DAGISEL-LABEL: divergent_value_i16:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -266,7 +266,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9GISEL-LABEL: divergnet_value_i16:
+; GFX9GISEL-LABEL: divergent_value_i16:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -285,7 +285,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1064DAGISEL-LABEL: divergnet_value_i16:
+; GFX1064DAGISEL-LABEL: divergent_value_i16:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -303,7 +303,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1064DAGISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1064DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1064GISEL-LABEL: divergnet_value_i16:
+; GFX1064GISEL-LABEL: divergent_value_i16:
; GFX1064GISEL: ; %bb.0: ; %entry
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1064GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -321,7 +321,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1064GISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1064GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1032DAGISEL-LABEL: divergnet_value_i16:
+; GFX1032DAGISEL-LABEL: divergent_value_i16:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -339,7 +339,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1032DAGISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1032DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1032GISEL-LABEL: divergnet_value_i16:
+; GFX1032GISEL-LABEL: divergent_value_i16:
; GFX1032GISEL: ; %bb.0: ; %entry
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1032GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -357,7 +357,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1032GISEL-NEXT: global_store_short v[0:1], v2, off
; GFX1032GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1164DAGISEL-LABEL: divergnet_value_i16:
+; GFX1164DAGISEL-LABEL: divergent_value_i16:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -376,7 +376,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1164DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX1164DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1164GISEL-LABEL: divergnet_value_i16:
+; GFX1164GISEL-LABEL: divergent_value_i16:
; GFX1164GISEL: ; %bb.0: ; %entry
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1164GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -395,7 +395,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1164GISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX1164GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1132DAGISEL-LABEL: divergnet_value_i16:
+; GFX1132DAGISEL-LABEL: divergent_value_i16:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
@@ -414,7 +414,7 @@ define void @divergnet_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX1132DAGISEL-NEXT: global_store_b16 v[0:1], v2, off
; GFX1132DAGISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1132GISEL-LABEL: divergnet_value_i16:
+; GFX1132GISEL-LABEL: divergent_value_i16:
; GFX1132GISEL: ; %bb.0: ; %entry
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1132GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
>From 1d3454a31c30a6376d8debe874288dd0dde0437c Mon Sep 17 00:00:00 2001
From: Aaditya <Aaditya.AlokDeshpande at amd.com>
Date: Mon, 4 May 2026 21:07:49 +0530
Subject: [PATCH 3/3] Drop unsed test prefixes
---
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll | 13 ++++---------
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll | 13 ++++---------
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll | 13 ++++---------
3 files changed, 12 insertions(+), 27 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
index b1ad231e5b5d6..e13183b767e09 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
@@ -7,10 +7,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1064GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1032DAGISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1032GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL,GFX1164DAGISEL-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL,GFX1164GISEL-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL,GFX1132DAGISEL-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL,GFX1132GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL %s
define amdgpu_kernel void @uniform_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX8DAGISEL-LABEL: uniform_value_i16:
@@ -3494,8 +3494,3 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX1132DAGISEL-FAKE16: {{.*}}
-; GFX1132GISEL-FAKE16: {{.*}}
-; GFX1164DAGISEL-FAKE16: {{.*}}
-; GFX1164GISEL-FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
index 67cc39cf22d5c..6f17259da2f12 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
@@ -7,10 +7,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1064GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1032DAGISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1032GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL,GFX1164DAGISEL-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL,GFX1164GISEL-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL,GFX1132DAGISEL-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL,GFX1132GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL %s
define amdgpu_kernel void @uniform_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX8DAGISEL-LABEL: uniform_value_i16:
@@ -3495,8 +3495,3 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX1132DAGISEL-FAKE16: {{.*}}
-; GFX1132GISEL-FAKE16: {{.*}}
-; GFX1164DAGISEL-FAKE16: {{.*}}
-; GFX1164GISEL-FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
index 87579fedd3cc1..86488b51a18ba 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
@@ -7,10 +7,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1064GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1032DAGISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1032GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL,GFX1164DAGISEL-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL,GFX1164GISEL-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL,GFX1132DAGISEL-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL,GFX1132GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164DAGISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX1164GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1132DAGISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX1132GISEL %s
define amdgpu_kernel void @uniform_value_i16(ptr addrspace(1) %out, i16 %in) {
; GFX8DAGISEL-LABEL: uniform_value_i16:
@@ -4006,8 +4006,3 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX1132DAGISEL-FAKE16: {{.*}}
-; GFX1132GISEL-FAKE16: {{.*}}
-; GFX1164DAGISEL-FAKE16: {{.*}}
-; GFX1164GISEL-FAKE16: {{.*}}
More information about the llvm-branch-commits
mailing list