[llvm] [WIP][AMDGPU] Fix emitting illegal COPY (PR #131752)
Pankaj Dwivedi via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 04:26:49 PDT 2025
https://github.com/PankajDwivedi-25 updated https://github.com/llvm/llvm-project/pull/131752
>From 52e2176863e7aee618f18f6acffd4d7143ac2a0f Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Tue, 18 Mar 2025 13:30:20 +0530
Subject: [PATCH 1/3] Fix emitting illegal COPY
---
llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll | 25 ++++++++++++++++++++
1 file changed, 25 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
diff --git a/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
new file mode 100644
index 0000000000000..6b14a660f580d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; XFAIL: *
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN
+
+define amdgpu_ps i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.bf16) {
+ %sign = fpext bfloat %sign.bf16 to float
+ %op = call float @llvm.copysign.f32(float %mag, float %sign)
+ %cast = bitcast float %op to i32
+ ret i32 %cast
+}
+
+; define i32 @s_copysign_f32_bf16(float %mag, bfloat %sign.bf16) {
+; %sign = fpext bfloat %sign.bf16 to float
+; %op = call float @llvm.copysign.f32(float %mag, float %sign)
+; %cast = bitcast float %op to i32
+; ret i32 %cast
+; }
+
+; define i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.bf16) {
+; %sign = fpext bfloat %sign.bf16 to float
+; %op = call float @llvm.copysign.f32(float %mag, float %sign)
+; %cast = bitcast float %op to i32
+; %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
+; ret i32 %readlane
+; }
\ No newline at end of file
>From 487fdd13bc7af9fc28d7a35edb22007cb16b2258 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Mon, 24 Mar 2025 23:00:35 +0530
Subject: [PATCH 2/3] replace VGPR to SGPR COPY with readfirstlane
---
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 49 +++++
...l-args-inreg-no-sgpr-for-csrspill-xfail.ll | 186 +++++++++++++++++-
llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll | 30 ++-
.../CodeGen/AMDGPU/flat-atomic-fadd.f64.ll | 12 +-
.../AMDGPU/illegal-sgpr-to-vgpr-copy.ll | 92 ++++++---
...ev503538-move-to-valu-stack-srd-physreg.ll | 69 ++++++-
.../AMDGPU/tail-call-inreg-arguments.error.ll | 128 ++++++------
.../AMDGPU/write-register-vgpr-into-sgpr.ll | 13 +-
8 files changed, 457 insertions(+), 122 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 4342e7a369c13..1a29254c27a10 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -899,6 +899,55 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
MI, MI.getDebugLoc())) {
I = std::next(I);
MI.eraseFromParent();
+ } else {
+ // At this point, if we still have a VGPR → SGPR copy, it is completely
+ // illegal. We assume that it was intentionally introduced and should be
+ // replaced with the READFIRSTLANE to ensure correctness.
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DRC = TRI->getPhysRegBaseClass(DstReg);
+ ArrayRef<int16_t> SrcIndices = TRI->getRegSplitParts(SRC, 4);
+ ArrayRef<int16_t> DstIndices = TRI->getRegSplitParts(DRC, 4);
+ assert(SrcIndices.size() == DstIndices.size() &&
+ "Register triples should match");
+ MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
+ // If SrcReg is virtual register can we get the sub reg? one way to handle
+ // this If def is copy fold the def src. This should not work in case of
+ // src is and AGPR. Since direct copy from agpr to sgpr is not allowed?
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ // Get the def SrcReg
+ Register DefSrcReg = DefMI->getOperand(1).getReg();
+ // If not a register triple replace the opcode itself.
+ if (SrcIndices.size() == 1) {
+ MI.setDesc(TII->get(AMDGPU::V_READFIRSTLANE_B32));
+ MI.addOperand(*MI.getParent()->getParent(),
+ MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ return true;
+ }
+
+ if (!DefMI || !DefMI->isCopy() || TRI->isAGPR(*MRI, DefSrcReg))
+ return true;
+
+ for (unsigned Idx = 0; Idx < SrcIndices.size(); ++Idx) {
+ int16_t SubIdx = SrcIndices[Idx];
+ Register DefSrcSubReg = TRI->getSubReg(DefSrcReg, SubIdx);
+ Register DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+ assert(DstSubReg && DefSrcSubReg && "Failed to find subregs!");
+ LastMI = BuildMI(*MI.getParent(), I, MI.getDebugLoc(),
+ TII->get(AMDGPU::V_READFIRSTLANE_B32), DstSubReg)
+ .addReg(DefSrcSubReg)
+ .addReg(DefSrcReg, RegState::Implicit);
+ if (!FirstMI)
+ FirstMI = LastMI;
+ }
+ assert(FirstMI && LastMI);
+
+ FirstMI->addOperand(
+ MachineOperand::CreateReg(DstReg, true /*IsDef*/, true /*IsImp*/));
+
+ LastMI->addRegisterKilled(DefSrcReg, TRI);
+ I = std::next(I);
+ MI.eraseFromParent();
+ DefMI->eraseFromParent();
}
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll
index 34f4476f7fd6a..f7b44c0284886 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll
@@ -1,22 +1,202 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -enable-var-scope %s
-
-; CHECK: illegal VGPR to SGPR copy
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN
declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0
declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0
declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) #0
define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #0 {
+ ; GCN-LABEL: name: test_call_external_void_func_a15i32_inreg
+ ; GCN: bb.0 (%ir-block.0):
+ ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $vgpr0, $vgpr31
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr29
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr28
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr27
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr26
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr25
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr24
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr23
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr22
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr21
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr20
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr19
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr18
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr17
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @external_void_func_a15i32_inreg, target-flags(amdgpu-rel32-hi) @external_void_func_a15i32_inreg, implicit-def dead $scc
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY23]]
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY22]]
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY21]]
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY20]]
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY19]]
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY18]]
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY17]]
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY16]]
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY]]
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY24]]
+ ; GCN-NEXT: $sgpr0 = COPY [[COPY15]]
+ ; GCN-NEXT: $sgpr1 = COPY [[COPY14]]
+ ; GCN-NEXT: $sgpr2 = COPY [[COPY13]]
+ ; GCN-NEXT: $sgpr3 = COPY [[COPY12]]
+ ; GCN-NEXT: $sgpr16 = COPY [[COPY11]]
+ ; GCN-NEXT: $sgpr17 = COPY [[COPY10]]
+ ; GCN-NEXT: $sgpr18 = COPY [[COPY9]]
+ ; GCN-NEXT: $sgpr19 = COPY [[COPY8]]
+ ; GCN-NEXT: $sgpr20 = COPY [[COPY7]]
+ ; GCN-NEXT: $sgpr21 = COPY [[COPY6]]
+ ; GCN-NEXT: $sgpr22 = COPY [[COPY5]]
+ ; GCN-NEXT: $sgpr23 = COPY [[COPY4]]
+ ; GCN-NEXT: $sgpr24 = COPY [[COPY3]]
+ ; GCN-NEXT: $sgpr25 = COPY [[COPY2]]
+ ; GCN-NEXT: $sgpr26 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @external_void_func_a15i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26
+ ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: SI_RETURN
call void @external_void_func_a15i32_inreg([15 x i32] inreg %arg0)
ret void
}
define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) #0 {
+ ; GCN-LABEL: name: test_call_external_void_func_a16i32_inreg
+ ; GCN: bb.0 (%ir-block.0):
+ ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $vgpr0, $vgpr1, $vgpr31
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr29
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr28
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr27
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr26
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr25
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr24
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr22
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr21
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr20
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr19
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr18
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr17
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @external_void_func_a16i32_inreg, target-flags(amdgpu-rel32-hi) @external_void_func_a16i32_inreg, implicit-def dead $scc
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY24]]
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY23]]
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY22]]
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY21]]
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY20]]
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY19]]
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY18]]
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY17]]
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY]]
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY25]]
+ ; GCN-NEXT: $sgpr0 = COPY [[COPY16]]
+ ; GCN-NEXT: $sgpr1 = COPY [[COPY15]]
+ ; GCN-NEXT: $sgpr2 = COPY [[COPY14]]
+ ; GCN-NEXT: $sgpr3 = COPY [[COPY13]]
+ ; GCN-NEXT: $sgpr16 = COPY [[COPY12]]
+ ; GCN-NEXT: $sgpr17 = COPY [[COPY11]]
+ ; GCN-NEXT: $sgpr18 = COPY [[COPY10]]
+ ; GCN-NEXT: $sgpr19 = COPY [[COPY9]]
+ ; GCN-NEXT: $sgpr20 = COPY [[COPY8]]
+ ; GCN-NEXT: $sgpr21 = COPY [[COPY7]]
+ ; GCN-NEXT: $sgpr22 = COPY [[COPY6]]
+ ; GCN-NEXT: $sgpr23 = COPY [[COPY5]]
+ ; GCN-NEXT: $sgpr24 = COPY [[COPY4]]
+ ; GCN-NEXT: $sgpr25 = COPY [[COPY3]]
+ ; GCN-NEXT: $sgpr26 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
+ ; GCN-NEXT: $sgpr27 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @external_void_func_a16i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27
+ ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: SI_RETURN
call void @external_void_func_a16i32_inreg([16 x i32] inreg %arg0)
ret void
}
define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #0 {
+ ; GCN-LABEL: name: test_call_external_void_func_a15i32_inreg_i32_inreg
+ ; GCN: bb.0 (%ir-block.0):
+ ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $vgpr0, $vgpr1, $vgpr31
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr29
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr28
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr27
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr26
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr25
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr24
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr22
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr21
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr20
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr19
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr18
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr17
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @external_void_func_a15i32_inreg_i32_inreg, target-flags(amdgpu-rel32-hi) @external_void_func_a15i32_inreg_i32_inreg, implicit-def dead $scc
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY24]]
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY23]]
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY22]]
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY21]]
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY20]]
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY19]]
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY18]]
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY17]]
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY]]
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY25]]
+ ; GCN-NEXT: $sgpr0 = COPY [[COPY16]]
+ ; GCN-NEXT: $sgpr1 = COPY [[COPY15]]
+ ; GCN-NEXT: $sgpr2 = COPY [[COPY14]]
+ ; GCN-NEXT: $sgpr3 = COPY [[COPY13]]
+ ; GCN-NEXT: $sgpr16 = COPY [[COPY12]]
+ ; GCN-NEXT: $sgpr17 = COPY [[COPY11]]
+ ; GCN-NEXT: $sgpr18 = COPY [[COPY10]]
+ ; GCN-NEXT: $sgpr19 = COPY [[COPY9]]
+ ; GCN-NEXT: $sgpr20 = COPY [[COPY8]]
+ ; GCN-NEXT: $sgpr21 = COPY [[COPY7]]
+ ; GCN-NEXT: $sgpr22 = COPY [[COPY6]]
+ ; GCN-NEXT: $sgpr23 = COPY [[COPY5]]
+ ; GCN-NEXT: $sgpr24 = COPY [[COPY4]]
+ ; GCN-NEXT: $sgpr25 = COPY [[COPY3]]
+ ; GCN-NEXT: $sgpr26 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
+ ; GCN-NEXT: $sgpr27 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @external_void_func_a15i32_inreg_i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27
+ ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: SI_RETURN
call void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
index 6b14a660f580d..89ead929799e0 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
@@ -1,25 +1,21 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; XFAIL: *
-; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN
define amdgpu_ps i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.bf16) {
+ ; GCN-LABEL: name: s_copysign_f32_bf16
+ ; GCN: bb.0 (%ir-block.0):
+ ; GCN-NEXT: liveins: $sgpr0, $sgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GCN-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GCN-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], killed [[V_LSHLREV_B32_e64_]], implicit $exec
+ ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[V_BFI_B32_e64_]], implicit $exec
+ ; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0
%sign = fpext bfloat %sign.bf16 to float
%op = call float @llvm.copysign.f32(float %mag, float %sign)
%cast = bitcast float %op to i32
ret i32 %cast
}
-; define i32 @s_copysign_f32_bf16(float %mag, bfloat %sign.bf16) {
-; %sign = fpext bfloat %sign.bf16 to float
-; %op = call float @llvm.copysign.f32(float %mag, float %sign)
-; %cast = bitcast float %op to i32
-; ret i32 %cast
-; }
-
-; define i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.bf16) {
-; %sign = fpext bfloat %sign.bf16 to float
-; %op = call float @llvm.copysign.f32(float %mag, float %sign)
-; %cast = bitcast float %op to i32
-; %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast)
-; ret i32 %readlane
-; }
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
index 36714b386e7e5..60b772496cc34 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
@@ -45,8 +45,8 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %da
; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr)
; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY6]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY7]]
+ ; GFX90A_GFX942-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
+ ; GFX90A_GFX942-NEXT: $sgpr1 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data)
ret double %ret
@@ -118,8 +118,8 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY6]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY7]]
+ ; GFX90A_GFX942-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
+ ; GFX90A_GFX942-NEXT: $sgpr1 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret double %ret
@@ -145,8 +145,8 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw__noprivate(ptr %ptr,
; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY6]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY7]]
+ ; GFX90A_GFX942-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
+ ; GFX90A_GFX942-NEXT: $sgpr1 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret double %ret
diff --git a/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
index 597f90c0f4e84..c1c77d8ed65ec 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
@@ -1,65 +1,105 @@
-; RUN: not llc -mtriple=amdgcn -verify-machineinstrs=0 < %s 2>&1 | FileCheck -check-prefix=ERR %s
-; RUN: not llc -mtriple=amdgcn -verify-machineinstrs=0 < %s 2>&1 | FileCheck -check-prefix=GCN %s
-
-; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_i32 void (): illegal VGPR to SGPR copy
-; GCN: ; illegal copy v1 to s9
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 {
+ ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_i32
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: $sgpr9 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
+ ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr9
+ ; CHECK-NEXT: S_ENDPGM 0
%vgpr = call i32 asm sideeffect "; def $0", "=${v1}"()
call void asm sideeffect "; use $0", "${s9}"(i32 %vgpr)
ret void
}
-; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal VGPR to SGPR copy
-; GCN: ; illegal copy v[0:1] to s[10:11]
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 {
+ ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_v2i32
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr0_vgpr1
+ ; CHECK-NEXT: $sgpr10 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1, implicit-def $sgpr10_sgpr11
+ ; CHECK-NEXT: $sgpr11 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
+ ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr10_sgpr11
+ ; CHECK-NEXT: S_ENDPGM 0
%vgpr = call <2 x i32> asm sideeffect "; def $0", "=${v[0:1]}"()
call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr)
ret void
}
-; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal VGPR to SGPR copy
-; GCN: ; illegal copy v[0:3] to s[8:11]
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 {
+ ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_v4i32
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: $sgpr8 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK-NEXT: $sgpr9 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: $sgpr10 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: $sgpr11 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK-NEXT: S_ENDPGM 0
%vgpr = call <4 x i32> asm sideeffect "; def $0", "=${v[0:3]}"()
call void asm sideeffect "; use $0", "${s[8:11]}"(<4 x i32> %vgpr)
ret void
}
-; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal VGPR to SGPR copy
-; GCN: ; illegal copy v[0:7] to s[8:15]
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 {
+ ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_v8i32
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: $sgpr8 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; CHECK-NEXT: $sgpr9 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: $sgpr10 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: $sgpr11 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: $sgpr12 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: $sgpr13 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: $sgpr14 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: $sgpr15 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; CHECK-NEXT: S_ENDPGM 0
%vgpr = call <8 x i32> asm sideeffect "; def $0", "=${v[0:7]}"()
call void asm sideeffect "; use $0", "${s[8:15]}"(<8 x i32> %vgpr)
ret void
}
-; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal VGPR to SGPR copy
-; GCN: ; illegal copy v[0:15] to s[16:31]
define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 {
+ ; CHECK-LABEL: name: illegal_vgpr_to_sgpr_copy_v16i32
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr16 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; CHECK-NEXT: $sgpr17 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr18 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr19 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr20 = V_READFIRSTLANE_B32 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr21 = V_READFIRSTLANE_B32 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr22 = V_READFIRSTLANE_B32 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr23 = V_READFIRSTLANE_B32 $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr24 = V_READFIRSTLANE_B32 $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr25 = V_READFIRSTLANE_B32 $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr26 = V_READFIRSTLANE_B32 $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr27 = V_READFIRSTLANE_B32 $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr28 = V_READFIRSTLANE_B32 $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr29 = V_READFIRSTLANE_B32 $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr30 = V_READFIRSTLANE_B32 $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: $sgpr31 = V_READFIRSTLANE_B32 $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+ ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; CHECK-NEXT: S_ENDPGM 0
%vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"()
call void asm sideeffect "; use $0", "${s[16:31]}"(<16 x i32> %vgpr)
ret void
}
-; ERR: error: <unknown>:0:0: in function illegal_agpr_to_sgpr_copy_i32 void (): illegal VGPR to SGPR copy
-; GCN: v_accvgpr_read_b32 [[COPY1:v[0-9]+]], a1
-; GCN: ; illegal copy [[COPY1]] to s9
define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_i32() #1 {
+ ; CHECK-LABEL: name: illegal_agpr_to_sgpr_copy_i32
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $agpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $agpr1
+ ; CHECK-NEXT: $sgpr9 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
+ ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $sgpr9
+ ; CHECK-NEXT: S_ENDPGM 0
%agpr = call i32 asm sideeffect "; def $0", "=${a1}"()
call void asm sideeffect "; use $0", "${s9}"(i32 %agpr)
ret void
}
-; ERR: error: <unknown>:0:0: in function illegal_agpr_to_sgpr_copy_v2i32 void (): illegal VGPR to SGPR copy
-; GCN-DAG: v_accvgpr_read_b32 v[[COPY1L:[0-9]+]], a0
-; GCN-DAG: v_accvgpr_read_b32 v[[COPY1H:[0-9]+]], a1
-; GCN: ; illegal copy v[[[COPY1L]]:[[COPY1H]]] to s[10:11]
-define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_v2i32() #1 {
- %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${a[0:1]}"()
- call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr)
- ret void
-}
-
attributes #0 = { nounwind }
attributes #1 = { nounwind "target-cpu"="gfx908" }
diff --git a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll
index f0b3d334af67d..ad13390e3285b 100644
--- a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll
@@ -1,14 +1,73 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs=0 -O0 2> %t.err < %s | FileCheck %s
-; RUN: FileCheck -check-prefix=ERR %s < %t.err
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -O0 -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN
; FIXME: This error will be fixed by supporting arbitrary divergent
; dynamic allocas by performing a wave umax of the size.
-; ERR: error: <unknown>:0:0: in function move_to_valu_assert_srd_is_physreg_swdev503538 i32 (ptr addrspace(1)): illegal VGPR to SGPR copy
-
-; CHECK: ; illegal copy v0 to s32
define i32 @move_to_valu_assert_srd_is_physreg_swdev503538(ptr addrspace(1) %ptr) {
+ ; GCN-LABEL: name: move_to_valu_assert_srd_is_physreg_swdev503538
+ ; GCN: bb.0.entry:
+ ; GCN-NEXT: successors: %bb.4(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr0, $vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $sgpr32
+ ; GCN-NEXT: $sgpr32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
+ ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GCN-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.4:
+ ; GCN-NEXT: successors: %bb.5(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY2]], implicit $exec
+ ; GCN-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.5:
+ ; GCN-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (load (s32) from %ir.alloca, align 8, addrspace 5)
+ ; GCN-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GCN-NEXT: SI_WATERFALL_LOOP %bb.4, implicit $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.6:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1.loadstoreloop:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.6, %3, %bb.1
+ ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY3]], [[PHI]], 0, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: BUFFER_STORE_BYTE_OFFEN killed [[V_MOV_B32_e32_]], killed [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.1, addrspace 5)
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PHI]], killed [[S_MOV_B32_1]], implicit-def dead $scc
+ ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
+ ; GCN-NEXT: S_CMP_LT_U32 [[S_ADD_I32_]], killed [[S_MOV_B32_2]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2.Flow:
+ ; GCN-NEXT: successors: %bb.3(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_BRANCH %bb.3
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.3.split:
+ ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+ ; GCN-NEXT: SI_RETURN implicit $vgpr0
entry:
%idx = load i32, ptr addrspace(1) %ptr, align 4
%zero = extractelement <4 x i32> zeroinitializer, i32 %idx
diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll
index 242b5e9aeaf42..96d590108fb71 100644
--- a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll
+++ b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll
@@ -1,41 +1,42 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 2> %t.err < %s | FileCheck %s
-; RUN: FileCheck -check-prefix=ERR %s < %t.err
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN
+
; FIXME: These tests cannot be tail called, and should be executed in a waterfall loop.
declare hidden void @void_func_i32_inreg(i32 inreg)
-; ERR: error: <unknown>:0:0: in function tail_call_i32_inreg_divergent void (i32): illegal VGPR to SGPR copy
-; ERR: error: <unknown>:0:0: in function indirect_tail_call_i32_inreg_divergent void (i32): illegal VGPR to SGPR copy
-
define void @tail_call_i32_inreg_divergent(i32 %vgpr) {
-; CHECK-LABEL: tail_call_i32_inreg_divergent:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s16, s33
-; CHECK-NEXT: s_mov_b32 s33, s32
-; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
-; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT: s_mov_b64 exec, s[18:19]
-; CHECK-NEXT: v_writelane_b32 v40, s16, 2
-; CHECK-NEXT: s_addk_i32 s32, 0x400
-; CHECK-NEXT: v_writelane_b32 v40, s30, 0
-; CHECK-NEXT: v_writelane_b32 v40, s31, 1
-; CHECK-NEXT: s_getpc_b64 s[16:17]
-; CHECK-NEXT: s_add_u32 s16, s16, void_func_i32_inreg at rel32@lo+4
-; CHECK-NEXT: s_addc_u32 s17, s17, void_func_i32_inreg at rel32@hi+12
-; CHECK-NEXT: ; illegal copy v0 to s0
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT: v_readlane_b32 s31, v40, 1
-; CHECK-NEXT: v_readlane_b32 s30, v40, 0
-; CHECK-NEXT: s_mov_b32 s32, s33
-; CHECK-NEXT: v_readlane_b32 s4, v40, 2
-; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
-; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b64 exec, s[6:7]
-; CHECK-NEXT: s_mov_b32 s33, s4
-; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: s_setpc_b64 s[30:31]
+ ; GCN-LABEL: name: tail_call_i32_inreg_divergent
+ ; GCN: bb.0 (%ir-block.0):
+ ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @void_func_i32_inreg, target-flags(amdgpu-rel32-hi) @void_func_i32_inreg, implicit-def dead $scc
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]]
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]]
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]]
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY6]]
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY5]]
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY4]]
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY3]]
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY2]]
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY]]
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]]
+ ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @void_func_i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0
+ ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: SI_RETURN
tail call void @void_func_i32_inreg(i32 inreg %vgpr)
ret void
}
@@ -43,35 +44,38 @@ define void @tail_call_i32_inreg_divergent(i32 %vgpr) {
@constant = external hidden addrspace(4) constant ptr
define void @indirect_tail_call_i32_inreg_divergent(i32 %vgpr) {
-; CHECK-LABEL: indirect_tail_call_i32_inreg_divergent:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT: s_mov_b32 s16, s33
-; CHECK-NEXT: s_mov_b32 s33, s32
-; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
-; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT: s_mov_b64 exec, s[18:19]
-; CHECK-NEXT: s_addk_i32 s32, 0x400
-; CHECK-NEXT: v_writelane_b32 v40, s16, 2
-; CHECK-NEXT: s_getpc_b64 s[16:17]
-; CHECK-NEXT: s_add_u32 s16, s16, constant at rel32@lo+4
-; CHECK-NEXT: s_addc_u32 s17, s17, constant at rel32@hi+12
-; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
-; CHECK-NEXT: v_writelane_b32 v40, s30, 0
-; CHECK-NEXT: v_writelane_b32 v40, s31, 1
-; CHECK-NEXT: ; illegal copy v0 to s0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT: v_readlane_b32 s31, v40, 1
-; CHECK-NEXT: v_readlane_b32 s30, v40, 0
-; CHECK-NEXT: s_mov_b32 s32, s33
-; CHECK-NEXT: v_readlane_b32 s4, v40, 2
-; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
-; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b64 exec, s[6:7]
-; CHECK-NEXT: s_mov_b32 s33, s4
-; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: s_setpc_b64 s[30:31]
+ ; GCN-LABEL: name: indirect_tail_call_i32_inreg_divergent
+ ; GCN: bb.0 (%ir-block.0):
+ ; GCN-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @constant, target-flags(amdgpu-rel32-hi) @constant, implicit-def dead $scc
+ ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from @constant, addrspace 4)
+ ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]]
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]]
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]]
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY6]]
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY5]]
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY4]]
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY3]]
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY2]]
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY]]
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]]
+ ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0
+ ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GCN-NEXT: SI_RETURN
%fptr = load ptr, ptr addrspace(4) @constant, align 8
tail call void %fptr(i32 inreg %vgpr)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll b/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
index de3b1d5bf78b3..4e03c6070314f 100644
--- a/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
@@ -1,6 +1,5 @@
-; XFAIL: *
-; REQUIRES: asserts
-; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN
; write_register doesn't prevent us from illegally trying to write a
; vgpr value into a scalar register, but I don't think there's much we
@@ -11,6 +10,14 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
declare void @llvm.amdgcn.wave.barrier() #2
define amdgpu_kernel void @write_vgpr_into_sgpr() {
+ ; GCN-LABEL: name: write_vgpr_into_sgpr
+ ; GCN: bb.0 (%ir-block.0):
+ ; GCN-NEXT: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; GCN-NEXT: $exec_lo = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
+ ; GCN-NEXT: WAVE_BARRIER
+ ; GCN-NEXT: S_ENDPGM 0
%tid = call i32 @llvm.amdgcn.workitem.id.x()
call void @llvm.write_register.i32(metadata !0, i32 %tid)
call void @llvm.amdgcn.wave.barrier() #2
>From fc9de334cac1840fd70139bb30121716bb49b7fd Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Wed, 26 Mar 2025 16:56:35 +0530
Subject: [PATCH 3/3] add issue 130443 test
---
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 4 +-
llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll | 25 ++++---
llvm/test/CodeGen/AMDGPU/issue130443.ll | 70 ++++++++++++++++++++
3 files changed, 84 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/issue130443.ll
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 1a29254c27a10..2601a3b60e658 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -908,7 +908,7 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
ArrayRef<int16_t> SrcIndices = TRI->getRegSplitParts(SRC, 4);
ArrayRef<int16_t> DstIndices = TRI->getRegSplitParts(DRC, 4);
assert(SrcIndices.size() == DstIndices.size() &&
- "Register triples should match");
+ "Register tuple should match");
MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
// If SrcReg is virtual register can we get the sub reg? one way to handle
// this If def is copy fold the def src. This should not work in case of
@@ -916,7 +916,7 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
// Get the def SrcReg
Register DefSrcReg = DefMI->getOperand(1).getReg();
- // If not a register triple replace the opcode itself.
+ // If not a register tuple replace the opcode itself.
if (SrcIndices.size() == 1) {
MI.setDesc(TII->get(AMDGPU::V_READFIRSTLANE_B32));
MI.addOperand(*MI.getParent()->getParent(),
diff --git a/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
index 89ead929799e0..e69cea873f2a2 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
@@ -1,21 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GFX11
define amdgpu_ps i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.bf16) {
- ; GCN-LABEL: name: s_copysign_f32_bf16
- ; GCN: bb.0 (%ir-block.0):
- ; GCN-NEXT: liveins: $sgpr0, $sgpr1
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GCN-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
- ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
- ; GCN-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], killed [[V_LSHLREV_B32_e64_]], implicit $exec
- ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[V_BFI_B32_e64_]], implicit $exec
- ; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0
+ ; GFX11-LABEL: name: s_copysign_f32_bf16
+ ; GFX11: bb.0 (%ir-block.0):
+ ; GFX11-NEXT: liveins: $sgpr0, $sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX11-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], killed [[V_LSHLREV_B32_e64_]], implicit $exec
+ ; GFX11-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[V_BFI_B32_e64_]], implicit $exec
+ ; GFX11-NEXT: SI_RETURN_TO_EPILOG $sgpr0
%sign = fpext bfloat %sign.bf16 to float
%op = call float @llvm.copysign.f32(float %mag, float %sign)
%cast = bitcast float %op to i32
ret i32 %cast
}
-
diff --git a/llvm/test/CodeGen/AMDGPU/issue130443.ll b/llvm/test/CodeGen/AMDGPU/issue130443.ll
new file mode 100644
index 0000000000000..19357986272cf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue130443.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GFX9
+
+declare hidden void @external_void_func_a15i32_inreg([16 x i32] inreg)
+
+define void @test_call_external_void_func_a15i32_inreg([16 x i32] inreg %arg0) {
+ ; GFX9-LABEL: name: test_call_external_void_func_a15i32_inreg
+ ; GFX9: bb.0 (%ir-block.0):
+ ; GFX9-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $vgpr0, $vgpr1, $vgpr31
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr29
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr28
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr27
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr26
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr25
+ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr24
+ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23
+ ; GFX9-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr22
+ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr21
+ ; GFX9-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr20
+ ; GFX9-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr19
+ ; GFX9-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr18
+ ; GFX9-NEXT: [[COPY15:%[0-9]+]]:sgpr_32 = COPY $sgpr17
+ ; GFX9-NEXT: [[COPY16:%[0-9]+]]:sgpr_32 = COPY $sgpr16
+ ; GFX9-NEXT: [[COPY17:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GFX9-NEXT: [[COPY18:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX9-NEXT: [[COPY19:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX9-NEXT: [[COPY20:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX9-NEXT: [[COPY21:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GFX9-NEXT: [[COPY22:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX9-NEXT: [[COPY23:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GFX9-NEXT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX9-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GFX9-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @external_void_func_a15i32_inreg, target-flags(amdgpu-rel32-hi) @external_void_func_a15i32_inreg, implicit-def dead $scc
+ ; GFX9-NEXT: [[COPY25:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX9-NEXT: $sgpr4_sgpr5 = COPY [[COPY24]]
+ ; GFX9-NEXT: $sgpr6_sgpr7 = COPY [[COPY23]]
+ ; GFX9-NEXT: $sgpr8_sgpr9 = COPY [[COPY22]]
+ ; GFX9-NEXT: $sgpr10_sgpr11 = COPY [[COPY21]]
+ ; GFX9-NEXT: $sgpr12 = COPY [[COPY20]]
+ ; GFX9-NEXT: $sgpr13 = COPY [[COPY19]]
+ ; GFX9-NEXT: $sgpr14 = COPY [[COPY18]]
+ ; GFX9-NEXT: $sgpr15 = COPY [[COPY17]]
+ ; GFX9-NEXT: $vgpr31 = COPY [[COPY]]
+ ; GFX9-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY25]]
+ ; GFX9-NEXT: $sgpr0 = COPY [[COPY16]]
+ ; GFX9-NEXT: $sgpr1 = COPY [[COPY15]]
+ ; GFX9-NEXT: $sgpr2 = COPY [[COPY14]]
+ ; GFX9-NEXT: $sgpr3 = COPY [[COPY13]]
+ ; GFX9-NEXT: $sgpr16 = COPY [[COPY12]]
+ ; GFX9-NEXT: $sgpr17 = COPY [[COPY11]]
+ ; GFX9-NEXT: $sgpr18 = COPY [[COPY10]]
+ ; GFX9-NEXT: $sgpr19 = COPY [[COPY9]]
+ ; GFX9-NEXT: $sgpr20 = COPY [[COPY8]]
+ ; GFX9-NEXT: $sgpr21 = COPY [[COPY7]]
+ ; GFX9-NEXT: $sgpr22 = COPY [[COPY6]]
+ ; GFX9-NEXT: $sgpr23 = COPY [[COPY5]]
+ ; GFX9-NEXT: $sgpr24 = COPY [[COPY4]]
+ ; GFX9-NEXT: $sgpr25 = COPY [[COPY3]]
+ ; GFX9-NEXT: $sgpr26 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec
+ ; GFX9-NEXT: $sgpr27 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec
+ ; GFX9-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[SI_PC_ADD_REL_OFFSET]], @external_void_func_a15i32_inreg, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27
+ ; GFX9-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; GFX9-NEXT: SI_RETURN
+ call void @external_void_func_a15i32_inreg([16 x i32] inreg %arg0)
+ ret void
+}
More information about the llvm-commits
mailing list