[llvm] 5508973 - [AMDGPU][GlobalISel] Add readanylane combines for merge-like instruct… (#172546)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 18 08:04:10 PST 2025
Author: vangthao95
Date: 2025-12-18T08:04:06-08:00
New Revision: 55089733b6b3ac70d4808bff734946288f0b6251
URL: https://github.com/llvm/llvm-project/commit/55089733b6b3ac70d4808bff734946288f0b6251
DIFF: https://github.com/llvm/llvm-project/commit/55089733b6b3ac70d4808bff734946288f0b6251.diff
LOG: [AMDGPU][GlobalISel] Add readanylane combines for merge-like instruct… (#172546)
…ions
When a merge-like instruction has all readanylane sources and the result
is copied to VGPRs, eliminate the readanylanes by either using the
original unmerge source directly or building a new merge with the VGPR
sources.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/fabs.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fneg.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 2b782684c348a..c77a4e79d70ea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -128,8 +128,9 @@ class AMDGPURegBankLegalizeCombiner {
bool isLaneMask(Register Reg);
std::pair<MachineInstr *, Register> tryMatch(Register Src, unsigned Opcode);
- std::pair<GUnmerge *, int> tryMatchRALFromUnmerge(Register Src);
- Register getReadAnyLaneSrc(Register Src);
+ Register tryMatchUnmergeDefs(SmallVectorImpl<Register> &DefRegs);
+ SmallVector<Register> tryMatchMergeReadAnyLane(GMergeLikeInstr *Merge);
+ SmallVector<Register> getReadAnyLaneSrcs(Register Src);
void replaceRegWithOrBuildCopy(Register Dst, Register Src);
bool tryEliminateReadAnyLane(MachineInstr &Copy);
@@ -154,31 +155,48 @@ AMDGPURegBankLegalizeCombiner::tryMatch(Register Src, unsigned Opcode) {
return {MatchMI, MatchMI->getOperand(1).getReg()};
}
-std::pair<GUnmerge *, int>
-AMDGPURegBankLegalizeCombiner::tryMatchRALFromUnmerge(Register Src) {
- MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
- if (ReadAnyLane->getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)
- return {nullptr, -1};
-
- Register RALSrc = ReadAnyLane->getOperand(1).getReg();
- if (auto *UnMerge = getOpcodeDef<GUnmerge>(RALSrc, MRI))
- return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+// Check if all registers are from same unmerge and there is no shuffling.
+// Returns the unmerge source if both conditions are met.
+Register AMDGPURegBankLegalizeCombiner::tryMatchUnmergeDefs(
+ SmallVectorImpl<Register> &DefRegs) {
+ auto *UnMerge = getOpcodeDef<GUnmerge>(DefRegs[0], MRI);
+ if (!UnMerge || UnMerge->getNumDefs() != DefRegs.size())
+ return {};
+ for (unsigned I = 1; I < DefRegs.size(); ++I) {
+ if (UnMerge->getReg(I) != DefRegs[I])
+ return {};
+ }
+ return UnMerge->getSourceReg();
+}
- return {nullptr, -1};
+// Check if all merge sources are readanylanes and return the readanylane
+// sources if they are.
+SmallVector<Register> AMDGPURegBankLegalizeCombiner::tryMatchMergeReadAnyLane(
+ GMergeLikeInstr *Merge) {
+ SmallVector<Register> ReadAnyLaneSrcs;
+ for (unsigned i = 0; i < Merge->getNumSources(); ++i) {
+ Register Src;
+ if (!mi_match(Merge->getSourceReg(i), MRI,
+ m_GAMDGPUReadAnyLane(m_Reg(Src))))
+ return {};
+ ReadAnyLaneSrcs.push_back(Src);
+ }
+ return ReadAnyLaneSrcs;
}
-Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
+SmallVector<Register>
+AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrcs(Register Src) {
// Src = G_AMDGPU_READANYLANE RALSrc
Register RALSrc;
if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
- return RALSrc;
+ return {RALSrc};
// RALSrc = G_ANYEXT S16Src
// TruncSrc = G_AMDGPU_READANYLANE RALSrc
// Src = G_TRUNC TruncSrc
if (mi_match(Src, MRI,
m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) {
- return RALSrc;
+ return {RALSrc};
}
// TruncSrc = G_AMDGPU_READANYLANE RALSrc
@@ -186,27 +204,25 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
// Src = G_ANYEXT AextSrc
if (mi_match(Src, MRI,
m_GAnyExt(m_GTrunc(m_GAMDGPUReadAnyLane(m_Reg(RALSrc)))))) {
- return RALSrc;
+ return {RALSrc};
}
- // LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
- // LoSgpr = G_AMDGPU_READANYLANE LoVgpr
- // HiSgpr = G_AMDGPU_READANYLANE HiVgpr
- // Src G_MERGE_VALUES LoSgpr, HiSgpr
- auto *Merge = getOpcodeDef<GMergeLikeInstr>(Src, MRI);
- if (Merge) {
- unsigned NumElts = Merge->getNumSources();
- auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
- if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+ // Sgpr0 = G_AMDGPU_READANYLANE Vgpr0
+ // Sgpr1 = G_AMDGPU_READANYLANE Vgpr1
+ // ...
+ // Src = G_MERGE_LIKE Sgpr0, Sgpr1, ...
+ // Dst = COPY Src
+ if (auto *Merge = getOpcodeDef<GMergeLikeInstr>(Src, MRI)) {
+ SmallVector<Register> ReadAnyLaneSrcs = tryMatchMergeReadAnyLane(Merge);
+ if (ReadAnyLaneSrcs.empty())
return {};
- // Check if all elements are from same unmerge and there is no shuffling.
- for (unsigned i = 1; i < NumElts; ++i) {
- auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
- if (UnmergeI != Unmerge || (unsigned)IdxI != i)
- return {};
- }
- return Unmerge->getSourceReg();
+ // Vgpr0, Vgpr1, ... = G_UNMERGE_VALUES UnmergeSrc
+ if (Register UnmergeSrc = tryMatchUnmergeDefs(ReadAnyLaneSrcs))
+ return {UnmergeSrc};
+
+ // Multiple ReadAnyLane vgpr sources, need to merge Vgpr0, Vgpr1, ...
+ return ReadAnyLaneSrcs;
}
// SrcRegIdx = G_AMDGPU_READANYLANE RALElSrc
@@ -217,7 +233,7 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
return {};
int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
- Merge = getOpcodeDef<GMergeLikeInstr>(UnMerge->getSourceReg(), MRI);
+ auto *Merge = getOpcodeDef<GMergeLikeInstr>(UnMerge->getSourceReg(), MRI);
if (!Merge || UnMerge->getNumDefs() != Merge->getNumSources())
return {};
@@ -227,7 +243,7 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
auto [RALEl, RALElSrc] = tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);
if (RALEl)
- return RALElSrc;
+ return {RALElSrc};
return {};
}
@@ -259,17 +275,27 @@ bool AMDGPURegBankLegalizeCombiner::tryEliminateReadAnyLane(
if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
RALDst = SrcMI.getOperand(1).getReg();
- Register RALSrc = getReadAnyLaneSrc(RALDst);
- if (!RALSrc)
+ B.setInstrAndDebugLoc(Copy);
+ SmallVector<Register> ReadAnyLaneSrcRegs = getReadAnyLaneSrcs(RALDst);
+ if (ReadAnyLaneSrcRegs.empty())
return false;
- B.setInstr(Copy);
+ Register ReadAnyLaneSrc;
+ if (ReadAnyLaneSrcRegs.size() == 1) {
+ ReadAnyLaneSrc = ReadAnyLaneSrcRegs[0];
+ } else {
+ // Multiple readanylane sources without a common unmerge, merge them.
+ auto Merge = B.buildMergeLikeInstr({VgprRB, MRI.getType(RALDst)},
+ ReadAnyLaneSrcRegs);
+ ReadAnyLaneSrc = Merge.getReg(0);
+ }
+
if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
// Src = READANYLANE RALSrc Src = READANYLANE RALSrc
// Dst = Copy Src $Dst = Copy Src
// -> ->
// Dst = RALSrc $Dst = Copy RALSrc
- replaceRegWithOrBuildCopy(Dst, RALSrc);
+ replaceRegWithOrBuildCopy(Dst, ReadAnyLaneSrc);
} else {
// RALDst = READANYLANE RALSrc RALDst = READANYLANE RALSrc
// Src = G_BITCAST RALDst Src = G_BITCAST RALDst
@@ -277,7 +303,7 @@ bool AMDGPURegBankLegalizeCombiner::tryEliminateReadAnyLane(
// -> ->
// NewVgpr = G_BITCAST RALDst NewVgpr = G_BITCAST RALDst
// Dst = NewVgpr $Dst = Copy NewVgpr
- auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+ auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, ReadAnyLaneSrc);
replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fabs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fabs.ll
index 39a793ce67bb9..63008f5a47752 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fabs.ll
@@ -250,11 +250,6 @@ define amdgpu_ps void @s_fabs_v2f32(<2 x float> inreg %in, ptr addrspace(1) %out
; GFX11: ; %bb.0:
; GFX11-NEXT: v_and_b32_e64 v2, 0x7fffffff, s0
; GFX11-NEXT: v_and_b32_e64 v3, 0x7fffffff, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: v_readfirstlane_b32 s1, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX11-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fneg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fneg.ll
index ebc28cb005538..0e82a6e13ef18 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fneg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fneg.ll
@@ -250,11 +250,6 @@ define amdgpu_ps void @s_fneg_v2f32(<2 x float> inreg %in, ptr addrspace(1) %out
; GFX11: ; %bb.0:
; GFX11-NEXT: v_xor_b32_e64 v2, 0x80000000, s0
; GFX11-NEXT: v_xor_b32_e64 v3, 0x80000000, s1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: v_readfirstlane_b32 s1, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX11-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
index c035bd0ecfdec..bdc161184b27b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll
@@ -152,10 +152,6 @@ define amdgpu_ps void @op_readanylanes_merge_to_virtual_vgpr(ptr addrspace(1) in
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; CHECK-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: v_readfirstlane_b32 s1, v1
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: v_mov_b32_e32 v1, s1
; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; CHECK-NEXT: s_endpgm
%load = load volatile <2 x float>, ptr addrspace(1) %ptr0
@@ -172,10 +168,6 @@ define amdgpu_ps void @op_readanylanes_merge_bitcast_to_virtual_vgpr(ptr addrspa
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; CHECK-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: v_readfirstlane_b32 s1, v1
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: v_mov_b32_e32 v1, s1
; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; CHECK-NEXT: s_endpgm
%load = load volatile <2 x float>, ptr addrspace(1) %ptr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
index 64276baad4de1..c54df0bb000dc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir
@@ -364,12 +364,9 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV]]
- ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG]]
; CHECK-NEXT: [[FNEG1:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV1]]
- ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG1]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: G_STORE [[COPY4]](<2 x s32>), [[MV1]](p1) :: (store (<2 x s32>), addrspace 1)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[MV1]](p1) :: (store (<2 x s32>), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
@@ -405,10 +402,8 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV]]
- ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG]]
; CHECK-NEXT: [[FNEG1:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV1]]
- ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG1]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
%0:sgpr(s32) = COPY $sgpr0
@@ -445,13 +440,10 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV]]
- ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG]]
; CHECK-NEXT: [[FNEG1:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV1]]
- ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG1]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[BITCAST]](s64)
- ; CHECK-NEXT: G_STORE [[COPY4]](s64), [[MV1]](p1) :: (store (s64), addrspace 1)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: G_STORE [[BITCAST]](s64), [[MV1]](p1) :: (store (s64), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
@@ -488,11 +480,9 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1)
; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; CHECK-NEXT: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV]]
- ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG]]
; CHECK-NEXT: [[FNEG1:%[0-9]+]]:vgpr(s32) = G_FNEG [[UV1]]
- ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[FNEG1]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[FNEG]](s32), [[FNEG1]](s32)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
%0:sgpr(s32) = COPY $sgpr0
More information about the llvm-commits
mailing list