[llvm] 846a360 - AMDGPU: Don't run AMDGPUAttributor with -O0
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 8 04:52:44 PDT 2023
Author: Matt Arsenault
Date: 2023-06-08T07:52:37-04:00
New Revision: 846a360e1698f817317f2c454f61a9cfb075cb68
URL: https://github.com/llvm/llvm-project/commit/846a360e1698f817317f2c454f61a9cfb075cb68
DIFF: https://github.com/llvm/llvm-project/commit/846a360e1698f817317f2c454f61a9cfb075cb68.diff
LOG: AMDGPU: Don't run AMDGPUAttributor with -O0
Added:
llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll
llvm/test/CodeGen/AMDGPU/spill-m0.ll
llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a48e984ae0908..c76e78803a812 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1058,7 +1058,8 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
if (RemoveIncompatibleFunctions)
addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM));
- addPass(createAMDGPUAttributorPass());
+ if (TM->getOptLevel() > CodeGenOpt::None)
+ addPass(createAMDGPUAttributorPass());
// FIXME: This pass adds 2 hacky attributes that can be replaced with an
// analysis, and should be removed.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
index 9247eca4b6754..0df80d67e7715 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
@@ -7,7 +7,7 @@
define amdgpu_kernel void @stack_write_fi() {
; CHECK-LABEL: stack_write_fi:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_add_u32 s0, s0, s7
+; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_mov_b32 s5, 0
; CHECK-NEXT: s_mov_b32 s4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
index 25f4478702e82..e9292f4e34dcd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA %s
; HSA-LABEL: name: default_kernel
; HSA: liveins:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
index 3ce609676b8bf..6b2e6d8dfdb39 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s
-; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s
+; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
define amdgpu_kernel void @i8_arg(ptr addrspace(1) nocapture %out, i8 %in) nounwind {
; HSA-VI-LABEL: name: i8_arg
@@ -1427,7 +1427,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out
; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
+ ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4)
; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: S_ENDPGM 0
@@ -1441,7 +1441,7 @@ define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out
; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4)
; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
@@ -1462,7 +1462,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %ou
; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
+ ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4)
; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: S_ENDPGM 0
@@ -1476,7 +1476,7 @@ define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %ou
; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4)
; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
@@ -1500,7 +1500,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %ou
; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
- ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+ ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: S_ENDPGM 0
@@ -1517,7 +1517,7 @@ define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %ou
; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
- ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
@@ -1541,7 +1541,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %
; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
- ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
+ ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4)
; HSA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: S_ENDPGM 0
@@ -1558,7 +1558,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %
; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
- ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
@@ -1582,7 +1582,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocaptu
; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260
; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
- ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+ ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: S_ENDPGM 0
@@ -1599,7 +1599,7 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocaptu
; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296
; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
- ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
@@ -1623,7 +1623,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace
; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
- ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
+ ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4)
; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: S_ENDPGM 0
@@ -1640,7 +1640,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace
; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164
; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
- ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
@@ -1731,7 +1731,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocaptu
; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
- ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
+ ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6)
; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg
@@ -1745,7 +1745,7 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocaptu
; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
- ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
%in = load i32, ptr addrspace(6) %in.byref
@@ -1838,8 +1838,8 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocaptu
; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
- ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
- ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
+ ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4)
+ ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4)
; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
@@ -1859,8 +1859,8 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocaptu
; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
- ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4)
- ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
@@ -1882,7 +1882,7 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref
; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+ ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; HSA-VI-NEXT: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0
@@ -1893,7 +1893,7 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref
; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
+ ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
%in = load i32, ptr addrspace(4) %in.byref
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
index e5c11381865f5..86bad7b844488 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
@@ -30,8 +30,8 @@ define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) {
; CHECK-NEXT: bb.2.atomicrmw.start:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %16(s64), %bb.2, [[C1]](s64), %bb.1
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2
; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]]
; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
index 1d5490bec296f..21e280e9ba559 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
@@ -7,6 +7,9 @@
define amdgpu_kernel void @constant_fold_vector_add() {
; CHECK-LABEL: name: constant_fold_vector_add
; CHECK: bb.1.entry:
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
index 1df11d1441ee5..985819c2a2687 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
@@ -4,6 +4,9 @@
define amdgpu_kernel void @asm_convergent() convergent{
; CHECK-LABEL: name: asm_convergent
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !0
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "s_barrier", ""() convergent, !srcloc !0
@@ -13,6 +16,9 @@ define amdgpu_kernel void @asm_convergent() convergent{
define amdgpu_kernel void @asm_simple_memory_clobber() {
; CHECK-LABEL: name: asm_simple_memory_clobber
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !0
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !0
; CHECK-NEXT: S_ENDPGM 0
@@ -24,6 +30,9 @@ define amdgpu_kernel void @asm_simple_memory_clobber() {
define amdgpu_kernel void @asm_simple_vgpr_clobber() {
; CHECK-LABEL: name: asm_simple_vgpr_clobber
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !0
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"(), !srcloc !0
@@ -33,6 +42,9 @@ define amdgpu_kernel void @asm_simple_vgpr_clobber() {
define amdgpu_kernel void @asm_simple_sgpr_clobber() {
; CHECK-LABEL: name: asm_simple_sgpr_clobber
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !0
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "s_mov_b32 s0, 7", "~{s0}"(), !srcloc !0
@@ -42,6 +54,9 @@ define amdgpu_kernel void @asm_simple_sgpr_clobber() {
define amdgpu_kernel void @asm_simple_agpr_clobber() {
; CHECK-LABEL: name: asm_simple_agpr_clobber
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !0
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "; def a0", "~{a0}"(), !srcloc !0
@@ -51,9 +66,9 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
define i32 @asm_vgpr_early_clobber() {
; CHECK-LABEL: name: asm_vgpr_early_clobber
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %9, !0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -79,8 +94,8 @@ entry:
define i32 @test_single_vgpr_output() nounwind {
; CHECK-LABEL: name: test_single_vgpr_output
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %0
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %8
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -91,8 +106,8 @@ entry:
define i32 @test_single_sgpr_output_s32() nounwind {
; CHECK-LABEL: name: test_single_sgpr_output_s32
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %0
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %8
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -104,9 +119,9 @@ entry:
define float @test_multiple_register_outputs_same() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_same
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %0, 1769482 /* regdef:VGPR_32 */, def %1
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %8, 1769482 /* regdef:VGPR_32 */, def %9
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
@@ -121,9 +136,9 @@ define float @test_multiple_register_outputs_same() #0 {
define double @test_multiple_register_outputs_mixed() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_mixed
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %0, 3080202 /* regdef:VReg_64 */, def %1
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %8, 3080202 /* regdef:VReg_64 */, def %9
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
@@ -151,9 +166,12 @@ define float @test_vector_output() nounwind {
define amdgpu_kernel void @test_input_vgpr_imm() {
; CHECK-LABEL: name: test_input_vgpr_imm
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[C]](s32)
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:VGPR_32 */, [[COPY]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32)
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:VGPR_32 */, [[COPY1]]
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42)
ret void
@@ -162,9 +180,12 @@ define amdgpu_kernel void @test_input_vgpr_imm() {
define amdgpu_kernel void @test_input_sgpr_imm() {
; CHECK-LABEL: name: test_input_sgpr_imm
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[C]](s32)
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:SReg_32 */, [[COPY]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32)
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:SReg_32 */, [[COPY1]]
; CHECK-NEXT: S_ENDPGM 0
call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42)
ret void
@@ -173,6 +194,9 @@ define amdgpu_kernel void @test_input_sgpr_imm() {
define amdgpu_kernel void @test_input_imm() {
; CHECK-LABEL: name: test_input_imm
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42
; CHECK-NEXT: INLINEASM &"s_mov_b64 s[0:1], $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42
; CHECK-NEXT: S_ENDPGM 0
@@ -188,8 +212,8 @@ define float @test_input_vgpr(i32 %src) nounwind {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
- ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %1, 1769481 /* reguse:VGPR_32 */, [[COPY1]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1
+ ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %9, 1769481 /* reguse:VGPR_32 */, [[COPY1]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -203,8 +227,8 @@ define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind {
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1769482 /* regdef:VGPR_32 */, def %1, 262158 /* mem:m */, [[COPY]](p3)
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
+ ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1769482 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(ptr addrspace(3) elementtype(i32) %a)
@@ -220,8 +244,8 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32)
- ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3
+ ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%and = and i32 %a, 1
@@ -232,14 +256,14 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
define i32 @test_sgpr_matching_constraint() nounwind {
; CHECK-LABEL: name: test_sgpr_matching_constraint
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %0
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %8
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32)
- ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %4, 1900553 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4
+ ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %12, 1900553 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12
; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -261,10 +285,10 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
- ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %3, 1769482 /* regdef:VGPR_32 */, def %4, 1769482 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5)
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5
+ ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %11, 1769482 /* regdef:VGPR_32 */, def %12, 1769482 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13
; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
@@ -282,11 +306,11 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind {
; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %0
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
+ ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %8
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
entry:
@@ -298,6 +322,9 @@ entry:
define amdgpu_kernel void @asm_constraint_n_n() {
; CHECK-LABEL: name: asm_constraint_n_n
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: INLINEASM &"s_trap ${0:n}", 1 /* sideeffect attdialect */, 13 /* imm */, 10
; CHECK-NEXT: S_ENDPGM 0
tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll b/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
new file mode 100644
index 0000000000000..d838846355462
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT %s
+; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=NOOPT %s
+
+; Check that AMDGPUAttributor is not run with -O0.
+; OPT: .amdhsa_user_sgpr_private_segment_buffer 1
+; OPT: .amdhsa_user_sgpr_dispatch_ptr 0
+; OPT: .amdhsa_user_sgpr_queue_ptr 0
+; OPT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; OPT: .amdhsa_user_sgpr_dispatch_id 0
+; OPT: .amdhsa_user_sgpr_flat_scratch_init 0
+; OPT: .amdhsa_user_sgpr_private_segment_size 0
+; OPT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; OPT: .amdhsa_system_sgpr_workgroup_id_x 1
+; OPT: .amdhsa_system_sgpr_workgroup_id_y 0
+; OPT: .amdhsa_system_sgpr_workgroup_id_z 0
+; OPT: .amdhsa_system_sgpr_workgroup_info 0
+; OPT: .amdhsa_system_vgpr_workitem_id 0
+
+; NOOPT: .amdhsa_user_sgpr_private_segment_buffer 1
+; NOOPT: .amdhsa_user_sgpr_dispatch_ptr 1
+; NOOPT: .amdhsa_user_sgpr_queue_ptr 1
+; NOOPT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+; NOOPT: .amdhsa_user_sgpr_dispatch_id 1
+; NOOPT: .amdhsa_user_sgpr_flat_scratch_init 0
+; NOOPT: .amdhsa_user_sgpr_private_segment_size 0
+; NOOPT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
+; NOOPT: .amdhsa_system_sgpr_workgroup_id_x 1
+; NOOPT: .amdhsa_system_sgpr_workgroup_id_y 1
+; NOOPT: .amdhsa_system_sgpr_workgroup_id_z 1
+; NOOPT: .amdhsa_system_sgpr_workgroup_info 0
+; NOOPT: .amdhsa_system_vgpr_workitem_id 2
+define amdgpu_kernel void @foo() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
index 8b6fcc74e5f04..c9cd55eb33ea8 100644
--- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
@@ -30,10 +30,10 @@ define amdgpu_kernel void @test_loop(ptr addrspace(3) %ptr, i32 %n) nounwind {
;
; GCN_DBG-LABEL: test_loop:
; GCN_DBG: ; %bb.0: ; %entry
-; GCN_DBG-NEXT: s_load_dword s2, s[0:1], 0x9
+; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_writelane_b32 v0, s2, 0
-; GCN_DBG-NEXT: s_load_dword s1, s[0:1], 0xa
+; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
+; GCN_DBG-NEXT: s_load_dword s1, s[4:5], 0xa
; GCN_DBG-NEXT: s_mov_b32 s0, 0
; GCN_DBG-NEXT: s_mov_b32 s2, -1
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
@@ -103,13 +103,13 @@ define amdgpu_kernel void @loop_const_true(ptr addrspace(3) %ptr, i32 %n) nounwi
; GCN-NEXT: ds_write_b32 v0, v1
; GCN-NEXT: s_add_i32 s0, s0, 4
; GCN-NEXT: s_mov_b64 vcc, vcc
-; GCN-NEXT: s_cbranch_vccnz .LBB1_1
+; GCN-NEXT: s_cbranch_vccnz .LBB1_1
; GCN-NEXT: ; %bb.2: ; %DummyReturnBlock
; GCN-NEXT: s_endpgm
;
; GCN_DBG-LABEL: loop_const_true:
; GCN_DBG: ; %bb.0: ; %entry
-; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9
+; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
; GCN_DBG-NEXT: s_mov_b32 s0, 0
@@ -174,7 +174,7 @@ define amdgpu_kernel void @loop_const_false(ptr addrspace(3) %ptr, i32 %n) nounw
;
; GCN_DBG-LABEL: loop_const_false:
; GCN_DBG: ; %bb.0: ; %entry
-; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9
+; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
; GCN_DBG-NEXT: s_mov_b32 s0, 0
@@ -240,7 +240,7 @@ define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n) nounw
;
; GCN_DBG-LABEL: loop_const_undef:
; GCN_DBG: ; %bb.0: ; %entry
-; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9
+; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
; GCN_DBG-NEXT: s_mov_b32 s0, 0
@@ -318,7 +318,7 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
;
; GCN_DBG-LABEL: loop_arg_0:
; GCN_DBG: ; %bb.0: ; %entry
-; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9
+; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
; GCN_DBG-NEXT: v_mov_b32_e32 v1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll
new file mode 100644
index 0000000000000..4760ddb65bf2d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll
@@ -0,0 +1,22 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT %s
+; RUN: not llc --crash -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s
+
+; AMDGPUAttributor deletes the function "by accident" so it's never
+; codegened with optimizations.
+
+; OPT: .text
+; OPT-NEXT: .section ".note.GNU-stack"
+; OPT-NEXT: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
+; OPT-NEXT: .amdgpu_metadata
+; OPT-NEXT: ---
+; OPT-NEXT: amdhsa.kernels: []
+; OPT-NEXT: amdhsa.target: amdgcn-amd-amdhsa--gfx900
+; OPT-NEXT: amdhsa.version:
+; OPT-NEXT: - 1
+; OPT-NEXT: - 1
+; OPT-NEXT: ...
+define internal i32 @func() {
+ ret i32 0
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index a5edc2ea19362..189b0c3af3a31 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -42,18 +42,18 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
;
; GCN-O0-LABEL: simple_nested_if:
; GCN-O0: ; %bb.0: ; %bb
-; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GCN-O0-NEXT: s_mov_b32 s10, -1
-; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000
-; GCN-O0-NEXT: s_add_u32 s8, s8, s3
-; GCN-O0-NEXT: s_addc_u32 s9, s9, 0
-; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN-O0-NEXT: s_mov_b32 s14, -1
+; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
+; GCN-O0-NEXT: s_add_u32 s12, s12, s11
+; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
+; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
; GCN-O0-NEXT: v_writelane_b32 v1, s0, 0
; GCN-O0-NEXT: v_writelane_b32 v1, s1, 1
; GCN-O0-NEXT: v_mov_b32_e32 v2, v0
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s0, 1
; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
@@ -63,7 +63,7 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB0_4
; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: v_readlane_b32 s4, v1, 0
; GCN-O0-NEXT: v_readlane_b32 s5, v1, 1
; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
@@ -90,7 +90,7 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_cbranch_execz .LBB0_3
; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0
; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1
; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
@@ -187,18 +187,18 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
;
; GCN-O0-LABEL: uncollapsable_nested_if:
; GCN-O0: ; %bb.0: ; %bb
-; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GCN-O0-NEXT: s_mov_b32 s10, -1
-; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000
-; GCN-O0-NEXT: s_add_u32 s8, s8, s3
-; GCN-O0-NEXT: s_addc_u32 s9, s9, 0
-; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN-O0-NEXT: s_mov_b32 s14, -1
+; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
+; GCN-O0-NEXT: s_add_u32 s12, s12, s11
+; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
+; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
; GCN-O0-NEXT: v_writelane_b32 v1, s0, 0
; GCN-O0-NEXT: v_writelane_b32 v1, s1, 1
; GCN-O0-NEXT: v_mov_b32_e32 v2, v0
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s0, 1
; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
@@ -208,7 +208,7 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB1_3
; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: v_readlane_b32 s4, v1, 0
; GCN-O0-NEXT: v_readlane_b32 s5, v1, 1
; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
@@ -235,7 +235,7 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_cbranch_execz .LBB1_4
; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0
; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1
; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
@@ -261,7 +261,7 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_branch .LBB1_5
; GCN-O0-NEXT: .LBB1_4: ; %bb.inner.end
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: v_readlane_b32 s2, v1, 4
; GCN-O0-NEXT: v_readlane_b32 s3, v1, 5
; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3]
@@ -367,19 +367,19 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
;
; GCN-O0-LABEL: nested_if_if_else:
; GCN-O0: ; %bb.0: ; %bb
-; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GCN-O0-NEXT: s_mov_b32 s10, -1
-; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000
-; GCN-O0-NEXT: s_add_u32 s8, s8, s3
-; GCN-O0-NEXT: s_addc_u32 s9, s9, 0
-; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN-O0-NEXT: s_mov_b32 s14, -1
+; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
+; GCN-O0-NEXT: s_add_u32 s12, s12, s11
+; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
+; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
; GCN-O0-NEXT: s_mov_b64 s[2:3], s[0:1]
; GCN-O0-NEXT: v_writelane_b32 v1, s2, 0
; GCN-O0-NEXT: v_writelane_b32 v1, s3, 1
; GCN-O0-NEXT: v_mov_b32_e32 v2, v0
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
; GCN-O0-NEXT: s_mov_b32 s4, 0
; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
@@ -405,7 +405,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB2_6
; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s0, 2
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[0:1], v0, s0
@@ -428,7 +428,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_cbranch_execz .LBB2_5
; GCN-O0-NEXT: ; %bb.3: ; %bb.then
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0
; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1
; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
@@ -449,7 +449,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_branch .LBB2_5
; GCN-O0-NEXT: .LBB2_4: ; %bb.else
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0
; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1
; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
@@ -573,15 +573,15 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
;
; GCN-O0-LABEL: nested_if_else_if:
; GCN-O0: ; %bb.0: ; %bb
-; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GCN-O0-NEXT: s_mov_b32 s10, -1
-; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000
-; GCN-O0-NEXT: s_add_u32 s8, s8, s3
-; GCN-O0-NEXT: s_addc_u32 s9, s9, 0
-; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN-O0-NEXT: s_mov_b32 s14, -1
+; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
+; GCN-O0-NEXT: s_add_u32 s12, s12, s11
+; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
+; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; GCN-O0-NEXT: v_mov_b32_e32 v2, v0
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s0, 0
; GCN-O0-NEXT: ; implicit-def: $sgpr0
; GCN-O0-NEXT: v_mov_b32_e32 v4, 0
@@ -601,9 +601,9 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: v_addc_u32_e64 v2, s[2:3], v2, v6, s[2:3]
; GCN-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
; GCN-O0-NEXT: v_mov_b32_e32 v6, v2
-; GCN-O0-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s1, 0xf000
; GCN-O0-NEXT: s_mov_b32 s2, 0
; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3
@@ -632,9 +632,9 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_cbranch_execz .LBB3_8
; GCN-O0-NEXT: ; %bb.2: ; %bb.outer.then
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v4, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s0, 0xf000
; GCN-O0-NEXT: s_mov_b32 s2, 0
; GCN-O0-NEXT: s_mov_b32 s4, s2
@@ -656,8 +656,8 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_cbranch_execz .LBB3_7
; GCN-O0-NEXT: ; %bb.3: ; %bb.inner.then
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s0, 0xf000
; GCN-O0-NEXT: s_mov_b32 s2, 0
; GCN-O0-NEXT: s_mov_b32 s4, s2
@@ -671,9 +671,9 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 offset:8
; GCN-O0-NEXT: s_branch .LBB3_7
; GCN-O0-NEXT: .LBB3_4: ; %bb.outer.else
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v4, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s1, 0xf000
; GCN-O0-NEXT: s_mov_b32 s0, 0
; GCN-O0-NEXT: s_mov_b32 s2, s0
@@ -695,8 +695,8 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_cbranch_execz .LBB3_6
; GCN-O0-NEXT: ; %bb.5: ; %bb.inner.then2
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s0, 0xf000
; GCN-O0-NEXT: s_mov_b32 s2, 0
; GCN-O0-NEXT: s_mov_b32 s4, s2
@@ -783,18 +783,18 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
;
; GCN-O0-LABEL: s_endpgm_unsafe_barrier:
; GCN-O0: ; %bb.0: ; %bb
-; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GCN-O0-NEXT: s_mov_b32 s10, -1
-; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000
-; GCN-O0-NEXT: s_add_u32 s8, s8, s3
-; GCN-O0-NEXT: s_addc_u32 s9, s9, 0
-; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN-O0-NEXT: s_mov_b32 s14, -1
+; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
+; GCN-O0-NEXT: s_add_u32 s12, s12, s11
+; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
+; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
; GCN-O0-NEXT: v_writelane_b32 v1, s0, 0
; GCN-O0-NEXT: v_writelane_b32 v1, s1, 1
; GCN-O0-NEXT: v_mov_b32_e32 v2, v0
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s0, 1
; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
@@ -805,7 +805,7 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_cbranch_execz .LBB4_2
; GCN-O0-NEXT: ; %bb.1: ; %bb.then
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0
; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1
; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
index b876f1ac9706a..62fdfdea6ad1b 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
@@ -43,9 +43,9 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %
; GCN-LABEL: name: uniform_opt_lshr_and_cmp
; GCN: bb.0.entry:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; GCN-NEXT: liveins: $sgpr0_sgpr1
+ ; GCN-NEXT: liveins: $sgpr4_sgpr5
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset, addrspace 4)
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index 35f07759d5bf1..b03d705b43e97 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -11,10 +11,10 @@ define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
; GCN-LABEL: name: extract_w_offset_vgpr
; GCN: bb.0.entry:
; GCN-NEXT: successors: %bb.1(0x80000000)
- ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0
- ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1
; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440
@@ -56,22 +56,22 @@ define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr2
; GCN-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr1
; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr0
- ; GCN-NEXT: undef %28.sub0:vreg_512 = COPY [[COPY1]]
- ; GCN-NEXT: %28.sub1:vreg_512 = COPY [[COPY2]]
- ; GCN-NEXT: %28.sub2:vreg_512 = COPY [[COPY3]]
- ; GCN-NEXT: %28.sub3:vreg_512 = COPY [[COPY4]]
- ; GCN-NEXT: %28.sub4:vreg_512 = COPY [[COPY5]]
- ; GCN-NEXT: %28.sub5:vreg_512 = COPY [[COPY6]]
- ; GCN-NEXT: %28.sub6:vreg_512 = COPY [[COPY7]]
- ; GCN-NEXT: %28.sub7:vreg_512 = COPY [[COPY8]]
- ; GCN-NEXT: %28.sub8:vreg_512 = COPY [[COPY9]]
- ; GCN-NEXT: %28.sub9:vreg_512 = COPY [[COPY10]]
- ; GCN-NEXT: %28.sub10:vreg_512 = COPY [[COPY11]]
- ; GCN-NEXT: %28.sub11:vreg_512 = COPY [[COPY12]]
- ; GCN-NEXT: %28.sub12:vreg_512 = COPY [[COPY13]]
- ; GCN-NEXT: %28.sub13:vreg_512 = COPY [[COPY14]]
- ; GCN-NEXT: %28.sub14:vreg_512 = COPY [[COPY15]]
- ; GCN-NEXT: %28.sub15:vreg_512 = COPY [[COPY16]]
+ ; GCN-NEXT: undef %35.sub0:vreg_512 = COPY [[COPY1]]
+ ; GCN-NEXT: %35.sub1:vreg_512 = COPY [[COPY2]]
+ ; GCN-NEXT: %35.sub2:vreg_512 = COPY [[COPY3]]
+ ; GCN-NEXT: %35.sub3:vreg_512 = COPY [[COPY4]]
+ ; GCN-NEXT: %35.sub4:vreg_512 = COPY [[COPY5]]
+ ; GCN-NEXT: %35.sub5:vreg_512 = COPY [[COPY6]]
+ ; GCN-NEXT: %35.sub6:vreg_512 = COPY [[COPY7]]
+ ; GCN-NEXT: %35.sub7:vreg_512 = COPY [[COPY8]]
+ ; GCN-NEXT: %35.sub8:vreg_512 = COPY [[COPY9]]
+ ; GCN-NEXT: %35.sub9:vreg_512 = COPY [[COPY10]]
+ ; GCN-NEXT: %35.sub10:vreg_512 = COPY [[COPY11]]
+ ; GCN-NEXT: %35.sub11:vreg_512 = COPY [[COPY12]]
+ ; GCN-NEXT: %35.sub12:vreg_512 = COPY [[COPY13]]
+ ; GCN-NEXT: %35.sub13:vreg_512 = COPY [[COPY14]]
+ ; GCN-NEXT: %35.sub14:vreg_512 = COPY [[COPY15]]
+ ; GCN-NEXT: %35.sub15:vreg_512 = COPY [[COPY16]]
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -81,12 +81,12 @@ define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
- ; GCN-NEXT: dead %45:vgpr_32 = COPY [[DEF]]
+ ; GCN-NEXT: dead [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, [[COPY]](s32), implicit $exec
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 %28, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]]
+ ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 %35, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]]
; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1
; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5)
; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 2699899845b3c..20282ff2992b9 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -51,9 +51,6 @@
; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O0-NEXT: Expand reduction intrinsics
; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions
-; GCN-O0-NEXT: AMDGPU Attributor
-; GCN-O0-NEXT: FunctionPass Manager
-; GCN-O0-NEXT: Cycle Info Analysis
; GCN-O0-NEXT: CallGraph Construction
; GCN-O0-NEXT: Call Graph SCC Pass Manager
; GCN-O0-NEXT: AMDGPU Annotate Kernel Features
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
index 5a49314691aaa..bb6a3ccfca1d4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@@ -3,7 +3,7 @@
; GCN-LABEL: {{^}}test_debug_value:
; NOOPT: .loc 1 1 42 prologue_end ; /tmp/test_debug_value.cl:1:42
-; NOOPT-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; NOOPT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
; NOOPT-NEXT: .Ltmp
; NOOPT-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- $sgpr4_sgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
index 50c27d1835c9f..74c47b31a3bff 100644
--- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
@@ -27,23 +27,23 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
; CHECK-LABEL: csr_vgpr_spill_fp_callee:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s6, s33
+; CHECK-NEXT: s_mov_b32 s18, s33
; CHECK-NEXT: s_mov_b32 s33, s32
-; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: s_mov_b64 exec, s[16:17]
; CHECK-NEXT: s_add_i32 s32, s32, 0x400
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v1, s30, 0
; CHECK-NEXT: v_writelane_b32 v1, s31, 1
-; CHECK-NEXT: s_getpc_b64 s[4:5]
-; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp at rel32@lo+4
-; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1]
-; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11]
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, callee_has_fp at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, callee_has_fp at rel32@hi+12
+; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3]
+; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
+; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
+; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; clobber csr v40
; CHECK-NEXT: ;;#ASMEND
@@ -54,7 +54,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
-; CHECK-NEXT: s_mov_b32 s33, s6
+; CHECK-NEXT: s_mov_b32 s33, s18
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
@@ -67,18 +67,28 @@ define amdgpu_kernel void @kernel_call() {
; CHECK-LABEL: kernel_call:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_mov_b32 s32, 0
-; CHECK-NEXT: s_add_u32 flat_scratch_lo, s4, s7
-; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
-; CHECK-NEXT: s_add_u32 s0, s0, s7
+; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
-; CHECK-NEXT: s_getpc_b64 s[4:5]
-; CHECK-NEXT: s_add_u32 s4, s4, csr_vgpr_spill_fp_callee at rel32@lo+4
-; CHECK-NEXT: s_addc_u32 s5, s5, csr_vgpr_spill_fp_callee at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1]
-; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11]
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v3, s16, 0
+; CHECK-NEXT: s_mov_b32 s13, s15
+; CHECK-NEXT: s_mov_b32 s12, s14
+; CHECK-NEXT: v_readlane_b32 s14, v3, 0
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, csr_vgpr_spill_fp_callee at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, csr_vgpr_spill_fp_callee at rel32@hi+12
+; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3]
+; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
+; CHECK-NEXT: s_mov_b32 s15, 20
+; CHECK-NEXT: v_lshlrev_b32_e64 v2, s15, v2
+; CHECK-NEXT: s_mov_b32 s15, 10
+; CHECK-NEXT: v_lshlrev_b32_e64 v1, s15, v1
+; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
+; CHECK-NEXT: ; implicit-def: $sgpr15
+; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
+; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_endpgm
bb:
tail call fastcc void @csr_vgpr_spill_fp_callee()
@@ -90,23 +100,23 @@ define internal fastcc void @csr_vgpr_spill_fp_tailcall_callee() #0 {
; CHECK-LABEL: csr_vgpr_spill_fp_tailcall_callee:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: s_mov_b64 exec, s[16:17]
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v1, s33, 0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; clobber csr v40
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: s_getpc_b64 s[4:5]
-; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp at rel32@lo+4
-; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp at rel32@hi+12
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, callee_has_fp at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, callee_has_fp at rel32@hi+12
; CHECK-NEXT: v_readlane_b32 s33, v1, 0
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1
+; CHECK-NEXT: s_xor_saveexec_b64 s[18:19], -1
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b64 exec, s[6:7]
-; CHECK-NEXT: s_setpc_b64 s[4:5]
+; CHECK-NEXT: s_mov_b64 exec, s[18:19]
+; CHECK-NEXT: s_setpc_b64 s[16:17]
bb:
call void asm sideeffect "; clobber csr v40", "~{v40}"()
tail call fastcc void @callee_has_fp()
@@ -117,18 +127,28 @@ define amdgpu_kernel void @kernel_tailcall() {
; CHECK-LABEL: kernel_tailcall:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_mov_b32 s32, 0
-; CHECK-NEXT: s_add_u32 flat_scratch_lo, s4, s7
-; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
-; CHECK-NEXT: s_add_u32 s0, s0, s7
+; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
-; CHECK-NEXT: s_getpc_b64 s[4:5]
-; CHECK-NEXT: s_add_u32 s4, s4, csr_vgpr_spill_fp_tailcall_callee at rel32@lo+4
-; CHECK-NEXT: s_addc_u32 s5, s5, csr_vgpr_spill_fp_tailcall_callee at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1]
-; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11]
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v3, s16, 0
+; CHECK-NEXT: s_mov_b32 s13, s15
+; CHECK-NEXT: s_mov_b32 s12, s14
+; CHECK-NEXT: v_readlane_b32 s14, v3, 0
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, csr_vgpr_spill_fp_tailcall_callee at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, csr_vgpr_spill_fp_tailcall_callee at rel32@hi+12
+; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3]
+; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
+; CHECK-NEXT: s_mov_b32 s15, 20
+; CHECK-NEXT: v_lshlrev_b32_e64 v2, s15, v2
+; CHECK-NEXT: s_mov_b32 s15, 10
+; CHECK-NEXT: v_lshlrev_b32_e64 v1, s15, v1
+; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
+; CHECK-NEXT: ; implicit-def: $sgpr15
+; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
+; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_endpgm
bb:
tail call fastcc void @csr_vgpr_spill_fp_tailcall_callee()
@@ -152,29 +172,29 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 {
; CHECK-LABEL: caller_save_vgpr_spill_fp_tail_call:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s6, s33
+; CHECK-NEXT: s_mov_b32 s18, s33
; CHECK-NEXT: s_mov_b32 s33, s32
-; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: s_mov_b64 exec, s[16:17]
; CHECK-NEXT: s_add_i32 s32, s32, 0x400
; CHECK-NEXT: v_writelane_b32 v1, s30, 0
; CHECK-NEXT: v_writelane_b32 v1, s31, 1
-; CHECK-NEXT: s_getpc_b64 s[4:5]
-; CHECK-NEXT: s_add_u32 s4, s4, tail_call at rel32@lo+4
-; CHECK-NEXT: s_addc_u32 s5, s5, tail_call at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1]
-; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11]
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, tail_call at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, tail_call at rel32@hi+12
+; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3]
+; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
+; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
+; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_readlane_b32 s31, v1, 1
; CHECK-NEXT: v_readlane_b32 s30, v1, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
-; CHECK-NEXT: s_mov_b32 s33, s6
+; CHECK-NEXT: s_mov_b32 s33, s18
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -186,29 +206,29 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 {
; CHECK-LABEL: caller_save_vgpr_spill_fp:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s7, s33
+; CHECK-NEXT: s_mov_b32 s19, s33
; CHECK-NEXT: s_mov_b32 s33, s32
-; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: s_mov_b64 exec, s[16:17]
; CHECK-NEXT: s_add_i32 s32, s32, 0x400
; CHECK-NEXT: v_writelane_b32 v2, s30, 0
; CHECK-NEXT: v_writelane_b32 v2, s31, 1
-; CHECK-NEXT: s_getpc_b64 s[4:5]
-; CHECK-NEXT: s_add_u32 s4, s4, caller_save_vgpr_spill_fp_tail_call at rel32@lo+4
-; CHECK-NEXT: s_addc_u32 s5, s5, caller_save_vgpr_spill_fp_tail_call at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1]
-; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11]
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, caller_save_vgpr_spill_fp_tail_call at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, caller_save_vgpr_spill_fp_tail_call at rel32@hi+12
+; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3]
+; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
+; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
+; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_readlane_b32 s31, v2, 1
; CHECK-NEXT: v_readlane_b32 s30, v2, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
-; CHECK-NEXT: s_mov_b32 s33, s7
+; CHECK-NEXT: s_mov_b32 s33, s19
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
@@ -220,18 +240,28 @@ define protected amdgpu_kernel void @kernel() {
; CHECK-LABEL: kernel:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_mov_b32 s32, 0
-; CHECK-NEXT: s_add_u32 flat_scratch_lo, s4, s7
-; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
-; CHECK-NEXT: s_add_u32 s0, s0, s7
+; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
+; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
+; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
-; CHECK-NEXT: s_getpc_b64 s[4:5]
-; CHECK-NEXT: s_add_u32 s4, s4, caller_save_vgpr_spill_fp at rel32@lo+4
-; CHECK-NEXT: s_addc_u32 s5, s5, caller_save_vgpr_spill_fp at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1]
-; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11]
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v3, s16, 0
+; CHECK-NEXT: s_mov_b32 s13, s15
+; CHECK-NEXT: s_mov_b32 s12, s14
+; CHECK-NEXT: v_readlane_b32 s14, v3, 0
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, caller_save_vgpr_spill_fp at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, caller_save_vgpr_spill_fp at rel32@hi+12
+; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3]
+; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
+; CHECK-NEXT: s_mov_b32 s15, 20
+; CHECK-NEXT: v_lshlrev_b32_e64 v2, s15, v2
+; CHECK-NEXT: s_mov_b32 s15, 10
+; CHECK-NEXT: v_lshlrev_b32_e64 v1, s15, v1
+; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2
+; CHECK-NEXT: ; implicit-def: $sgpr15
+; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
+; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_endpgm
entry:
%call = call i32 @caller_save_vgpr_spill_fp()
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index 54229988f2ee4..1a3f42b115a8d 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -11,7 +11,7 @@
define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, i32 %in) #0 {
; GCN-LABEL: spill_sgprs_to_multiple_vgprs:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
@@ -442,7 +442,7 @@ ret:
define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %in) #1 {
; GCN-LABEL: split_sgpr_spill_2_vgprs:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
@@ -667,9 +667,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s54, -1
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
-; GCN-NEXT: s_add_u32 s52, s52, s3
+; GCN-NEXT: s_add_u32 s52, s52, s11
; GCN-NEXT: s_addc_u32 s53, s53, 0
-; GCN-NEXT: s_load_dword s0, s[0:1], 0xb
+; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
@@ -902,9 +902,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s54, -1
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
-; GCN-NEXT: s_add_u32 s52, s52, s3
+; GCN-NEXT: s_add_u32 s52, s52, s11
; GCN-NEXT: s_addc_u32 s53, s53, 0
-; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
+; GCN-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
index 16aadade906e9..96f0287fb7779 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
@@ -7,9 +7,9 @@
define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 {
; GCN-LABEL: partial_no_vgprs_last_sgpr_spill:
; GCN: ; %bb.0:
-; GCN-NEXT: s_add_u32 s0, s0, s7
+; GCN-NEXT: s_add_u32 s0, s0, s15
; GCN-NEXT: s_addc_u32 s1, s1, 0
-; GCN-NEXT: s_load_dword s4, s[4:5], 0x2
+; GCN-NEXT: s_load_dword s4, s[8:9], 0x2
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
index 92da6309c16ba..c147acdafb4a5 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
@@ -16,11 +16,11 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
; GCN-LABEL: spill_sgpr_with_no_lower_vgpr_available:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s6, s33
+; GCN-NEXT: s_mov_b32 s18, s33
; GCN-NEXT: s_mov_b32 s33, s32
-; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, s[4:5]
+; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
+; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: s_add_i32 s32, s32, 0x7400
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
@@ -135,21 +135,23 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v255, s30, 0
; GCN-NEXT: v_writelane_b32 v255, s31, 1
+; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:444
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_getpc_b64 s[4:5]
-; GCN-NEXT: s_add_u32 s4, s4, child_function at gotpcrel32@lo+4
-; GCN-NEXT: s_addc_u32 s5, s5, child_function at gotpcrel32@hi+12
-; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GCN-NEXT: s_mov_b64 s[10:11], s[2:3]
-; GCN-NEXT: s_mov_b64 s[8:9], s[0:1]
-; GCN-NEXT: s_mov_b64 s[0:1], s[8:9]
-; GCN-NEXT: s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT: s_getpc_b64 s[16:17]
+; GCN-NEXT: s_add_u32 s16, s16, child_function at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s17, s17, child_function at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
+; GCN-NEXT: s_mov_b64 s[22:23], s[2:3]
+; GCN-NEXT: s_mov_b64 s[20:21], s[0:1]
+; GCN-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GCN-NEXT: s_mov_b64 s[2:3], s[22:23]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v255, 1
; GCN-NEXT: v_readlane_b32 s30, v255, 0
; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 ; 4-byte Folded Reload
@@ -264,10 +266,10 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00
-; GCN-NEXT: s_mov_b32 s33, s6
+; GCN-NEXT: s_mov_b32 s33, s18
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, align 4, addrspace(5)
@@ -308,11 +310,11 @@ define void @spill_to_lowest_available_vgpr() #0 {
; GCN-LABEL: spill_to_lowest_available_vgpr:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s6, s33
+; GCN-NEXT: s_mov_b32 s18, s33
; GCN-NEXT: s_mov_b32 s33, s32
-; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, s[4:5]
+; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
+; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: s_add_i32 s32, s32, 0x7400
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill
@@ -426,21 +428,23 @@ define void @spill_to_lowest_available_vgpr() #0 {
; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v254, s30, 0
; GCN-NEXT: v_writelane_b32 v254, s31, 1
+; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:440
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_getpc_b64 s[4:5]
-; GCN-NEXT: s_add_u32 s4, s4, child_function at gotpcrel32@lo+4
-; GCN-NEXT: s_addc_u32 s5, s5, child_function at gotpcrel32@hi+12
-; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GCN-NEXT: s_mov_b64 s[10:11], s[2:3]
-; GCN-NEXT: s_mov_b64 s[8:9], s[0:1]
-; GCN-NEXT: s_mov_b64 s[0:1], s[8:9]
-; GCN-NEXT: s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT: s_getpc_b64 s[16:17]
+; GCN-NEXT: s_add_u32 s16, s16, child_function at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s17, s17, child_function at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
+; GCN-NEXT: s_mov_b64 s[22:23], s[2:3]
+; GCN-NEXT: s_mov_b64 s[20:21], s[0:1]
+; GCN-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GCN-NEXT: s_mov_b64 s[2:3], s[22:23]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v254, 1
; GCN-NEXT: v_readlane_b32 s30, v254, 0
; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s33 ; 4-byte Folded Reload
@@ -554,10 +558,10 @@ define void @spill_to_lowest_available_vgpr() #0 {
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00
-; GCN-NEXT: s_mov_b32 s33, s6
+; GCN-NEXT: s_mov_b32 s33, s18
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, align 4, addrspace(5)
@@ -1000,15 +1004,17 @@ define void @spill_sgpr_with_tail_call() #0 {
; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:444
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_getpc_b64 s[4:5]
-; GCN-NEXT: s_add_u32 s4, s4, child_function at gotpcrel32@lo+4
-; GCN-NEXT: s_addc_u32 s5, s5, child_function at gotpcrel32@hi+12
-; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT: s_getpc_b64 s[16:17]
+; GCN-NEXT: s_add_u32 s16, s16, child_function at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s17, s17, child_function at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
@@ -1121,7 +1127,7 @@ define void @spill_sgpr_with_tail_call() #0 {
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64 s[4:5]
+; GCN-NEXT: s_setpc_b64 s[16:17]
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
@@ -1506,7 +1512,7 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 {
; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s6, s33
+; GCN-NEXT: s_mov_b32 s18, s33
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_i32 s32, s32, 0x7400
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
@@ -1621,30 +1627,30 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 {
; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 s[4:5], exec
+; GCN-NEXT: s_mov_b64 s[16:17], exec
; GCN-NEXT: s_mov_b64 exec, 1
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456
; GCN-NEXT: v_writelane_b32 v0, s30, 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: s_mov_b64 s[4:5], exec
+; GCN-NEXT: s_mov_b64 exec, s[16:17]
+; GCN-NEXT: s_mov_b64 s[16:17], exec
; GCN-NEXT: s_mov_b64 exec, 1
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456
; GCN-NEXT: v_writelane_b32 v0, s31, 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: s_getpc_b64 s[4:5]
-; GCN-NEXT: s_add_u32 s4, s4, child_function_ipra at rel32@lo+4
-; GCN-NEXT: s_addc_u32 s5, s5, child_function_ipra at rel32@hi+12
-; GCN-NEXT: s_mov_b64 s[10:11], s[2:3]
-; GCN-NEXT: s_mov_b64 s[8:9], s[0:1]
-; GCN-NEXT: s_mov_b64 s[0:1], s[8:9]
-; GCN-NEXT: s_mov_b64 s[2:3], s[10:11]
-; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: s_mov_b64 exec, s[16:17]
+; GCN-NEXT: s_getpc_b64 s[16:17]
+; GCN-NEXT: s_add_u32 s16, s16, child_function_ipra at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s17, s17, child_function_ipra at rel32@hi+12
+; GCN-NEXT: s_mov_b64 s[22:23], s[2:3]
+; GCN-NEXT: s_mov_b64 s[20:21], s[0:1]
+; GCN-NEXT: s_mov_b64 s[0:1], s[20:21]
+; GCN-NEXT: s_mov_b64 s[2:3], s[22:23]
+; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: s_mov_b64 exec, 1
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456
@@ -1776,7 +1782,7 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 {
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00
-; GCN-NEXT: s_mov_b32 s33, s6
+; GCN-NEXT: s_mov_b32 s33, s18
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
call void @child_function_ipra()
@@ -2049,10 +2055,10 @@ define void @spill_sgpr_no_free_vgpr_ipra_tail_call() #0 {
; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra_tail_call:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_getpc_b64 s[4:5]
-; GCN-NEXT: s_add_u32 s4, s4, child_function_ipra_tail_call at rel32@lo+4
-; GCN-NEXT: s_addc_u32 s5, s5, child_function_ipra_tail_call at rel32@hi+12
-; GCN-NEXT: s_setpc_b64 s[4:5]
+; GCN-NEXT: s_getpc_b64 s[16:17]
+; GCN-NEXT: s_add_u32 s16, s16, child_function_ipra_tail_call at rel32@lo+4
+; GCN-NEXT: s_addc_u32 s17, s17, child_function_ipra_tail_call at rel32@hi+12
+; GCN-NEXT: s_setpc_b64 s[16:17]
tail call void @child_function_ipra_tail_call()
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll b/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll
index a55b165035b84..ec8e18a246b75 100644
--- a/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll
+++ b/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll
@@ -9,7 +9,7 @@
define amdgpu_kernel void @test_sopk_size(i32 %var.mode) {
; GFX10-LABEL: test_sopk_size:
; GFX10: ; %bb.0:
-; GFX10: s_load_b32 s0, s[0:1], 0x0
+; GFX10: s_load_b32 s0, s[4:5], 0x0
; GFX10: s_mov_b32 s1, 3
; GFX10: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s1
; GFX10: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index 9dcb7d247f889..33aee16e38864 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -80,7 +80,7 @@ endif: ; preds = %else, %if
; Force save and restore of m0 during SMEM spill
; GCN-LABEL: {{^}}m0_unavailable_spill:
-; GCN: s_load_dword [[REG0:s[0-9]+]], s[0:1], {{0x[0-9]+}}
+; GCN: s_load_dword [[REG0:s[0-9]+]], s[4:5], {{0x[0-9]+}}
; GCN: ; def m0, 1
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
index f31262d969376..240d6a17f6288 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
@@ -11,7 +11,7 @@
define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() {
; CHECK-LABEL: __omp_offloading_16_dd2df_main_l9:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_add_u32 s0, s0, s7
+; CHECK-NEXT: s_add_u32 s0, s0, s15
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, v0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index 9625416763b9a..667002509c04b 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -105,22 +105,24 @@ define hidden i32 @called(i32 %a) noinline {
; GFX9-LABEL: {{^}}call:
define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]]
-; GFX9-O0-DAG: s_mov_b32 s4, 0{{$}}
-; GFX9-O0-DAG: v_mov_b32_e32 v2, [[ARG]]
+; GFX9-O0-DAG: s_mov_b32 s3, 0{{$}}
+; GFX9-O0-DAG: v_mov_b32_e32 v{{[0-9]+}}, [[ARG]]
; GFX9-O3: v_mov_b32_e32 v2, [[ARG]]
; GFX9-NEXT: s_not_b64 exec, exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, s3
; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_not_b64 exec, exec
%tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0)
-; GFX9: v_mov_b32_e32 v0, v2
+; GFX9-O0: v_mov_b32_e32 v0, v6
+; GFX9-O3: v_mov_b32_e32 v0, v2
; GFX9: s_swappc_b64
%tmp134 = call i32 @called(i32 %tmp107)
-; GFX9: v_mov_b32_e32 v1, v0
+; GFX9-O3: v_mov_b32_e32 v1, v0
; GFX9-O3: v_add_u32_e32 v1, v1, v2
-; GFX9-O0: v_add_u32_e64 v1, v1, v2
+; GFX9-O0: v_mov_b32_e32 v3, v0
+; GFX9-O0: v_add_u32_e64 v3, v3, v6
%tmp136 = add i32 %tmp134, %tmp107
%tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136)
; GFX9: buffer_store_dword v0
@@ -298,22 +300,24 @@ define hidden i32 @strict_wwm_called(i32 %a) noinline {
; GFX9-LABEL: {{^}}strict_wwm_call:
define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]]
-; GFX9-O0-DAG: s_mov_b32 s4, 0{{$}}
-; GFX9-O0-DAG: v_mov_b32_e32 v2, [[ARG]]
+; GFX9-O0-DAG: s_mov_b32 s3, 0{{$}}
+; GFX9-O0-DAG: v_mov_b32_e32 v6, [[ARG]]
; GFX9-O3: v_mov_b32_e32 v2, [[ARG]]
; GFX9-NEXT: s_not_b64 exec, exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, s3
; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_not_b64 exec, exec
%tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0)
-; GFX9: v_mov_b32_e32 v0, v2
+; GFX9-O3: v_mov_b32_e32 v0, v2
+; GFX9-O0: v_mov_b32_e32 v0, v6
; GFX9: s_swappc_b64
%tmp134 = call i32 @strict_wwm_called(i32 %tmp107)
-; GFX9: v_mov_b32_e32 v1, v0
+; GFX9-O3: v_mov_b32_e32 v1, v0
; GFX9-O3: v_add_u32_e32 v1, v1, v2
-; GFX9-O0: v_add_u32_e64 v1, v1, v2
+; GFX9-O0: v_mov_b32_e32 v3, v0
+; GFX9-O0: v_add_u32_e64 v3, v3, v6
%tmp136 = add i32 %tmp134, %tmp107
%tmp137 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp136)
; GFX9: buffer_store_dword v0
More information about the llvm-commits
mailing list