[llvm] [GlobalISel]: G_UNMERGE_VALUES for vectors with different element sizes (PR #133335)
Robert Imschweiler via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 27 16:01:45 PDT 2025
https://github.com/ro-i created https://github.com/llvm/llvm-project/pull/133335
This commit adds support for using different source and destination vector element sizes for G_UNMERGE_VALUES, e.g.:
`%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)`
This LLVM defect was identified via the AMD Fuzzing project.
>From 604988c69b59c61bf35526d945f8a7fa8e9af434 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 27 Mar 2025 17:54:38 -0500
Subject: [PATCH] [GlobalISel]: G_UNMERGE_VALUES for vectors with different
element sizes
This commit adds support for using different source and destination
vector element sizes for G_UNMERGE_VALUES, e.g.:
`%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)`
This LLVM defect was identified via the AMD Fuzzing project.
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 32 ++--
llvm/lib/CodeGen/MachineVerifier.cpp | 10 +-
.../AMDGPU/GlobalISel/insertelement.ll | 55 +++++++
.../GlobalISel/legalize-unmerge-values.mir | 155 ++++++++++++++++++
4 files changed, 236 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a9f80860124fb..4fcad22587f66 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8281,9 +8281,8 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
const unsigned NumDst = MI.getNumOperands() - 1;
Register SrcReg = MI.getOperand(NumDst).getReg();
- Register Dst0Reg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(Dst0Reg);
- if (DstTy.isPointer())
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (DstTy.getScalarType().isPointer())
return UnableToLegalize; // TODO
SrcReg = coerceToScalar(SrcReg);
@@ -8293,14 +8292,25 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
// Expand scalarizing unmerge as bitcast to integer and shift.
LLT IntTy = MRI.getType(SrcReg);
- MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
-
- const unsigned DstSize = DstTy.getSizeInBits();
- unsigned Offset = DstSize;
- for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
- auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
- auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
- MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
+ const unsigned DstSize = DstTy.getScalarSizeInBits();
+ SmallVector<Register> VectorElems;
+ Register Shift;
+ for (unsigned I = 0, Offset = 0; I != NumDst; Offset += DstSize) {
+ if (Offset) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
+ Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt).getReg(0);
+ } else {
+ Shift = SrcReg;
+ }
+ if (DstTy.isVector()) {
+ VectorElems.emplace_back(MIRBuilder.buildTrunc(DstTy.getScalarType(), Shift).getReg(0));
+ if (VectorElems.size() == DstTy.getNumElements()) {
+ MIRBuilder.buildBuildVector(MI.getOperand(I++), VectorElems);
+ VectorElems.clear();
+ }
+ } else {
+ MIRBuilder.buildTrunc(MI.getOperand(I++), Shift);
+ }
}
MI.eraseFromParent();
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index a7dbceb88c4c8..7cbf41038f6e4 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1510,11 +1510,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
LLT SrcTy = MRI->getType(MI->getOperand(NumDsts).getReg());
if (DstTy.isVector()) {
- // This case is the converse of G_CONCAT_VECTORS.
- if (!SrcTy.isVector() ||
- (SrcTy.getScalarType() != DstTy.getScalarType() &&
- !SrcTy.isPointerVector()) ||
- SrcTy.isScalableVector() != DstTy.isScalableVector() ||
+ // This case is the converse of G_CONCAT_VECTORS, but relaxed since
+ // G_UNMERGE_VALUES can handle src and dst vectors with different
+ // element sizes:
+ // %1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)
+ if (SrcTy.isScalableVector() != DstTy.isScalableVector() ||
SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits())
report("G_UNMERGE_VALUES source operand does not match vector "
"destination operands",
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
index 5eca04c02a9f9..96889f7a957b2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -6508,3 +6508,58 @@ entry:
%insert = insertelement <5 x double> %vec, double %val, i32 %idx
ret <5 x double> %insert
}
+
+; Found by fuzzer, reduced with llvm-reduce.
+define amdgpu_kernel void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) {
+; GPRIDX-LABEL: insert_very_small_from_very_large:
+; GPRIDX: ; %bb.0: ; %bb
+; GPRIDX-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
+; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
+; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
+; GPRIDX-NEXT: s_lshr_b32 s2, s12, 1
+; GPRIDX-NEXT: s_and_b32 s2, s2, 1
+; GPRIDX-NEXT: s_lshl_b32 s2, s2, 1
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
+; GPRIDX-NEXT: flat_store_byte v[0:1], v2
+; GPRIDX-NEXT: s_endpgm
+;
+; GFX10-LABEL: insert_very_small_from_very_large:
+; GFX10: ; %bb.0: ; %bb
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_lshr_b32 s2, s12, 1
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: s_and_b32 s2, s2, 1
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: s_lshl_b32 s2, s2, 1
+; GFX10-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-NEXT: flat_store_byte v[0:1], v2
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: insert_very_small_from_very_large:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x0
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x40
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_lshr_b32 s2, s8, 1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_and_b32 s2, s2, 1
+; GFX11-NEXT: v_mov_b32_e32 v1, s1
+; GFX11-NEXT: s_lshl_b32 s2, s2, 1
+; GFX11-NEXT: v_mov_b32_e32 v2, s2
+; GFX11-NEXT: flat_store_b8 v[0:1], v2
+; GFX11-NEXT: s_endpgm
+bb:
+ %0 = bitcast <32 x i16> %L3 to i512
+ %1 = trunc i512 %0 to i8
+ %2 = trunc i8 %1 to i2
+ %3 = bitcast i2 %2 to <2 x i1>
+ %I = insertelement <2 x i1> %3, i1 false, i32 0
+ store <2 x i1> %I, ptr %ptr, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
index c231aa8334d45..3500df7c99b6e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
@@ -96,6 +96,41 @@ body: |
$vgpr1 = COPY %4
...
+---
+name: test_unmerge_v2s8_v2s16
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: test_unmerge_v2s8_v2s16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)
+ %3:_(<2 x s16>) = G_ANYEXT %1
+ %4:_(<2 x s16>) = G_ANYEXT %2
+ $vgpr0 = COPY %3
+ $vgpr1 = COPY %4
+...
+
---
name: test_unmerge_s16_v3s16
body: |
@@ -120,6 +155,50 @@ body: |
$vgpr2 = COPY %6
...
+---
+name: test_unmerge_v2s8_v3s16
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test_unmerge_v2s8_v3s16
+ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C3]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
+ %0:_(<3 x s16>) = G_IMPLICIT_DEF
+ %1:_(<2 x s8>), %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
+ %4:_(<2 x s16>) = G_ANYEXT %1
+ %5:_(<2 x s16>) = G_ANYEXT %2
+ %6:_(<2 x s16>) = G_ANYEXT %3
+ $vgpr0 = COPY %4
+ $vgpr1 = COPY %5
+ $vgpr2 = COPY %6
+...
+
---
name: test_unmerge_s16_v4s16
@@ -191,6 +270,62 @@ body: |
$vgpr5 = COPY %12
...
+---
+name: test_unmerge_v4s8_v6s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: test_unmerge_v4s8_v6s16
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
+ ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8), [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV16]](s8)
+ ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV17]](s8)
+ ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV18]](s8)
+ ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV19]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32)
+ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8), [[UV32:%[0-9]+]]:_(s8), [[UV33:%[0-9]+]]:_(s8), [[UV34:%[0-9]+]]:_(s8), [[UV35:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV32]](s8)
+ ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV33]](s8)
+ ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV34]](s8)
+ ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV35]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR2]](<4 x s32>)
+ %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(<4 x s8>), %2:_(<4 x s8>), %3:_(<4 x s8>) = G_UNMERGE_VALUES %0
+ %4:_(<4 x s32>) = G_ANYEXT %1
+ %5:_(<4 x s32>) = G_ANYEXT %2
+ %6:_(<4 x s32>) = G_ANYEXT %3
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
+ $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5
+ $vgpr8_vgpr9_vgpr10_vgpr11 = COPY %6
+...
+
+---
+name: test_unmerge_v3s32_v6s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: test_unmerge_v3s32_v6s16
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
+ %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(<3 x s32>) = G_UNMERGE_VALUES %0
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+...
+
---
name: test_unmerge_s8_s16
@@ -1090,3 +1225,23 @@ body: |
$vgpr9_vgpr10_vgpr11 = COPY %8
...
+
+---
+name: test_unmerge_v3s32_v12s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: test_unmerge_v3s32_v12s16
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
+ ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>)
+ %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ %1:_(<3 x s32>), %2:_(<3 x s32>) = G_UNMERGE_VALUES %0
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+ $vgpr3_vgpr4_vgpr5 = COPY %2
+
+...
More information about the llvm-commits
mailing list