[llvm] 3bc1e08 - AMDGPU: Created a subclass for the return address operand in the tail call return instruction
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 10 10:54:25 PDT 2023
Author: Changpeng Fang
Date: 2023-04-10T10:53:33-07:00
New Revision: 3bc1e084eef09dcc7e5b77f70e904b2edb27791c
URL: https://github.com/llvm/llvm-project/commit/3bc1e084eef09dcc7e5b77f70e904b2edb27791c
DIFF: https://github.com/llvm/llvm-project/commit/3bc1e084eef09dcc7e5b77f70e904b2edb27791c.diff
LOG: AMDGPU: Created a subclass for the return address operand in the tail call return instruction
Summary:
This is to avoid using the callee saved registers for the return address
of the tail call return instruction.
Reviewers:
arsenm, cdevadas
Differential Revision:
https://reviews.llvm.org/D147096
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 209c90d2cb620..7cb8a95c983ab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2486,7 +2486,8 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
else
MIB.addGlobalAddress(GV, Offset + 12, GAFlags + 1);
- B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
+ if (!B.getMRI()->getRegClassOrNull(PCReg))
+ B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
if (PtrTy.getSizeInBits() == 32)
B.buildExtract(DstReg, PCReg, 0);
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0a33c9b9360f6..df11c839e51ac 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -600,7 +600,7 @@ def SI_CALL : SPseudoInstSI <
// Tail call handling pseudo
def SI_TCRETURN : SPseudoInstSI <(outs),
- (ins SReg_64:$src0, unknown:$callee, i32imm:$fp
diff ),
+ (ins CCR_SGPR_64:$src0, unknown:$callee, i32imm:$fp
diff ),
[(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fp
diff )]> {
let Size = 4;
let FixedSize = 1;
@@ -617,7 +617,7 @@ def SI_TCRETURN : SPseudoInstSI <(outs),
// Handle selecting indirect tail calls
def : GCNPat<
(AMDGPUtc_return i64:$src0, (i64 0), (i32 timm:$fp
diff )),
- (SI_TCRETURN SReg_64:$src0, (i64 0), i32imm:$fp
diff )
+ (SI_TCRETURN CCR_SGPR_64:$src0, (i64 0), i32imm:$fp
diff )
>;
def ADJCALLSTACKUP : SPseudoInstSI<
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 245de1a90286a..060f63158b53c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -817,6 +817,14 @@ def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16],
let HasSGPR = 1;
}
+// CCR (call clobbered registers) SGPR 64-bit registers, currently used for tail call
+// return address only.
+def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, (add (trunc SGPR_64, 16))> {
+ let CopyCost = SGPR_64.CopyCost;
+ let AllocationPriority = SGPR_64.AllocationPriority;
+ let HasSGPR = SGPR_64.HasSGPR;
+}
+
def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
(add TTMP_64Regs)> {
let isAllocatable = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
index 55318eae4d72f..15d03cad021cd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -183,7 +183,7 @@ define ptr addrspace(1) @tail_call_assert_align() {
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 967979e76dd8a..7653a53e82131 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -46,7 +46,7 @@ define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
@@ -70,7 +70,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b,
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
@@ -97,7 +97,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
@@ -119,7 +119,7 @@ define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
@@ -250,7 +250,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `ptr addrspace(5) inttoptr (i32 16 to ptr addrspace(5))`, align 16, addrspace 5)
@@ -360,7 +360,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -453,7 +453,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -586,7 +586,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32
+ ; GCN-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: $vgpr2 = COPY [[COPY4]](s32)
@@ -648,7 +648,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -753,7 +753,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
; GCN-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -931,7 +931,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-NEXT: G_STORE [[C1]](s64), [[FRAME_INDEX35]](p5) :: (store (s64) into %ir.alloca1, addrspace 5)
; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32)
; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval
; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]]
@@ -1095,7 +1095,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed
; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]]
@@ -1181,7 +1181,7 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 {
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
@@ -1230,7 +1230,7 @@ define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspa
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
@@ -1278,7 +1278,7 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
@@ -1325,7 +1325,7 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
@@ -1375,7 +1375,7 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
@@ -1427,7 +1427,7 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
@@ -1480,7 +1480,7 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY11]](s32), [[COPY12]](s32)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64
+ ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
index b9cdcff95ed1e..edcb4622d5d7e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
@@ -17,7 +17,7 @@ define void @tail_call_void_func_void() {
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @external_void_func_void
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
index 814228c163333..67e745a8a8386 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
@@ -8,15 +8,15 @@
define amdgpu_kernel void @s_input_output_i128() {
; GFX908-LABEL: name: s_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6750218 /* regdef:SGPR_128 */, def %4
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6946826 /* regdef:SGPR_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6750217 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6946825 /* reguse:SGPR_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: s_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6750218 /* regdef:SGPR_128 */, def %4
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6946826 /* regdef:SGPR_128 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6750217 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6946825 /* reguse:SGPR_128 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=s"()
call void asm sideeffect "; use $0", "s"(i128 %val)
@@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() {
define amdgpu_kernel void @v_input_output_i128() {
; GFX908-LABEL: name: v_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5701642 /* regdef:VReg_128 */, def %4
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5898250 /* regdef:VReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5701641 /* reguse:VReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5898249 /* reguse:VReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: v_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:VReg_128_Align2 */, def %4
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:VReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:VReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=v"()
call void asm sideeffect "; use $0", "v"(i128 %val)
@@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() {
define amdgpu_kernel void @a_input_output_i128() {
; GFX908-LABEL: name: a_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5636106 /* regdef:AReg_128 */, def %4
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:AReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5636105 /* reguse:AReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5832713 /* reguse:AReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: a_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5898250 /* regdef:AReg_128_Align2 */, def %4
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:AReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5898249 /* reguse:AReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:AReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = call i128 asm sideeffect "; def $0", "=a"()
call void asm sideeffect "; use $0", "a"(i128 %val)
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 785f8ce151e82..2450a989ecb8f 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX908-NEXT: {{ $}}
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5701642 /* regdef:VReg_128 */, def %26
+ ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5898250 /* regdef:VReg_128 */, def %26
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64 */, def %23
; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -35,7 +35,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef renamable $agpr0
- ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5701642 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5898250 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
@@ -58,7 +58,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX90A-NEXT: {{ $}}
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:VReg_128_Align2 */, def %25
+ ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128_Align2 */, def %25
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3407882 /* regdef:VReg_64_Align2 */, def %23
; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -80,7 +80,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef renamable $agpr0
- ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
+ ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3407882 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
More information about the llvm-commits
mailing list