[llvm] 11e3dde - GlobalISel: Reimplement fewerElementsVectorBasic
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 18:20:12 PST 2020
Author: Matt Arsenault
Date: 2020-02-24T21:19:47-05:00
New Revision: 11e3dde6252f481238dccd14956350ff328c4087
URL: https://github.com/llvm/llvm-project/commit/11e3dde6252f481238dccd14956350ff328c4087
DIFF: https://github.com/llvm/llvm-project/commit/11e3dde6252f481238dccd14956350ff328c4087.diff
LOG: GlobalISel: Reimplement fewerElementsVectorBasic
Changes the handling of odd breakdowns, and avoids using
G_EXTRACT/G_INSERT. Pad with undef to a wider size, and unmerge. Also
avoid introducing instructions for the fully undef components.
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3af0705dff85..d8767d1c3068 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2565,90 +2565,62 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
+ assert(TypeIdx == 0 && "only one type index expected");
+
const unsigned Opc = MI.getOpcode();
- const unsigned NumOps = MI.getNumOperands() - 1;
- const unsigned NarrowSize = NarrowTy.getSizeInBits();
+ const int NumOps = MI.getNumOperands() - 1;
const Register DstReg = MI.getOperand(0).getReg();
const unsigned Flags = MI.getFlags();
- const LLT DstTy = MRI.getType(DstReg);
- const unsigned Size = DstTy.getSizeInBits();
- const int NumParts = Size / NarrowSize;
- const LLT EltTy = DstTy.getElementType();
- const unsigned EltSize = EltTy.getSizeInBits();
- const unsigned BitsForNumParts = NarrowSize * NumParts;
-
- // Check if we have any leftovers. If we do, then only handle the case where
- // the leftover is one element.
- if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
- return UnableToLegalize;
-
- if (BitsForNumParts != Size) {
- Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
- MIRBuilder.buildUndef(AccumDstReg);
-
- // Handle the pieces which evenly divide into the requested type with
- // extract/op/insert sequence.
- for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
- SmallVector<SrcOp, 4> SrcOps;
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I), Offset);
- SrcOps.push_back(PartOpReg);
- }
-
- Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
- Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
- MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
- AccumDstReg = PartInsertReg;
- }
+ assert(NumOps <= 3 && "expected instrution with 1 result and 1-3 sources");
- // Handle the remaining element sized leftover piece.
- SmallVector<SrcOp, 4> SrcOps;
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
- MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I), BitsForNumParts);
- SrcOps.push_back(PartOpReg);
- }
+ LLT GCDTys[3];
+ LLT LCMTys[3];
+ SmallVector<Register, 8> ExtractedRegs[3];
+ SmallVector<Register, 8> Parts;
- Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
- MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
- MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
- MI.eraseFromParent();
+ // Break down all the sources into NarrowTy pieces we can operate on. This may
+ // involve creating merges to a wider type, padded with undef.
+ for (int I = 0; I != NumOps; ++I) {
+ Register SrcReg = MI.getOperand(I + 1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ GCDTys[I] = extractGCDType(ExtractedRegs[I], SrcTy, NarrowTy, SrcReg);
- return Legalized;
+ // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
+ LCMTys[I] = buildLCMMergePieces(SrcTy, NarrowTy, GCDTys[I],
+ ExtractedRegs[I], TargetOpcode::G_ANYEXT);
}
- SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
-
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
+ SmallVector<Register, 8> ResultRegs;
- if (NumOps >= 2)
- extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
+ // Input operands for each sub-instruction.
+ SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
- if (NumOps >= 3)
- extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
+ int NumParts = ExtractedRegs[0].size();
+ const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+ const unsigned NarrowSize = NarrowTy.getSizeInBits();
- for (int i = 0; i < NumParts; ++i) {
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ // We widened the source registers to satisfy merge/unmerge size
+ // constraints. We'll have some extra fully undef parts.
+ const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
- if (NumOps == 1)
- MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
- else if (NumOps == 2) {
- MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
- } else if (NumOps == 3) {
- MIRBuilder.buildInstr(Opc, {DstReg},
- {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
- }
+ for (int I = 0; I != NumRealParts; ++I) {
+ // Emit this instruction on each of the split pieces.
+ for (int J = 0; J != NumOps; ++J)
+ InputRegs[J] = ExtractedRegs[J][I];
- DstRegs.push_back(DstReg);
+ auto Inst = MIRBuilder.buildInstr(Opc, {NarrowTy}, InputRegs, Flags);
+ ResultRegs.push_back(Inst.getReg(0));
}
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ // Fill out the widened result with undef instead of creating instructions
+ // with undef inputs.
+ int NumUndefParts = NumParts - NumRealParts;
+ if (NumUndefParts != 0)
+ ResultRegs.append(NumUndefParts, MIRBuilder.buildUndef(NarrowTy).getReg(0));
+
+ // Extract the possibly padded result to the original result register.
+ buildWidenedRemergeToDst(DstReg, LCMTys[0], ResultRegs);
MI.eraseFromParent();
return Legalized;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
index d5bb17196047..f33d2c21f5a0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
@@ -75,18 +75,19 @@ body: |
; CHECK-LABEL: name: test_zext_trunc_v3s32_to_v3s16_to_v3s32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
- ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0
- ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT]], [[EXTRACT1]]
- ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[AND]](<2 x s32>), 0
- ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[COPY2]]
- ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[AND1]](s32), 64
- ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[DEF]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s32>), 0
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>) = G_TRUNC %0
%2:_(<3 x s32>) = G_ZEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
index 366343107fc2..7ccdcb414c28 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
@@ -20,7 +20,7 @@ body: |
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE64: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
; WAVE32-LABEL: name: and_s1_vcc_vcc_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
@@ -30,7 +30,7 @@ body: |
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE32: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -378,11 +378,11 @@ tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: and_s1_vcc_undef_vcc_undef_vcc
- ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 undef %1:sreg_64, undef %2:sreg_64, implicit-def dead $scc
+ ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 undef %1:sreg_64, undef %2:sreg_64
; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
; WAVE32-LABEL: name: and_s1_vcc_undef_vcc_undef_vcc
; WAVE32: $vcc_hi = IMPLICIT_DEF
- ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 undef %1:sreg_32, undef %2:sreg_32, implicit-def dead $scc
+ ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 undef %1:sreg_32, undef %2:sreg_32
; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
%2:vcc(s1) = G_AND undef %0:vcc(s1), undef %1:vcc(s1)
S_ENDPGM 0, implicit %2
@@ -445,7 +445,7 @@ body: |
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
; WAVE32-LABEL: name: and_s1_vcc_copy_to_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
@@ -456,7 +456,7 @@ body: |
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -489,7 +489,7 @@ body: |
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B64_]]
; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
@@ -501,7 +501,7 @@ body: |
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]]
; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
@@ -535,7 +535,7 @@ body: |
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B64_]]
; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
@@ -547,7 +547,7 @@ body: |
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B32_1]]
; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
index 3dd7bf020e50..2f10cd320320 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
@@ -20,7 +20,7 @@ body: |
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE64: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
; WAVE32-LABEL: name: or_s1_vcc_vcc_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
@@ -30,7 +30,7 @@ body: |
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE32: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -378,11 +378,11 @@ tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: or_s1_vcc_undef_vcc_undef_vcc
- ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 undef %1:sreg_64, undef %2:sreg_64, implicit-def dead $scc
+ ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 undef %1:sreg_64, undef %2:sreg_64
; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
; WAVE32-LABEL: name: or_s1_vcc_undef_vcc_undef_vcc
; WAVE32: $vcc_hi = IMPLICIT_DEF
- ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 undef %1:sreg_32, undef %2:sreg_32, implicit-def dead $scc
+ ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 undef %1:sreg_32, undef %2:sreg_32
; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
%2:vcc(s1) = G_OR undef %0:vcc(s1), undef %1:vcc(s1)
S_ENDPGM 0, implicit %2
@@ -445,7 +445,7 @@ body: |
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
; WAVE32-LABEL: name: or_s1_vcc_copy_to_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
@@ -456,7 +456,7 @@ body: |
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -489,7 +489,7 @@ body: |
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B64_]]
; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
@@ -501,7 +501,7 @@ body: |
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
@@ -535,7 +535,7 @@ body: |
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]]
; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
@@ -547,7 +547,7 @@ body: |
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B32_]]
; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
index eabd392fa175..c81348aa5d89 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
@@ -20,7 +20,7 @@ body: |
; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE64: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
; WAVE32-LABEL: name: xor_s1_vcc_vcc_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
@@ -30,7 +30,7 @@ body: |
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE32: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -379,11 +379,11 @@ tracksRegLiveness: true
body: |
bb.0:
; WAVE64-LABEL: name: xor_s1_vcc_undef_vcc_undef_vcc
- ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 undef %1:sreg_64, undef %2:sreg_64, implicit-def dead $scc
+ ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 undef %1:sreg_64, undef %2:sreg_64
; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
; WAVE32-LABEL: name: xor_s1_vcc_undef_vcc_undef_vcc
; WAVE32: $vcc_hi = IMPLICIT_DEF
- ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 undef %1:sreg_32, undef %2:sreg_32, implicit-def dead $scc
+ ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 undef %1:sreg_32, undef %2:sreg_32
; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
%2:vcc(s1) = G_XOR undef %0:vcc(s1), undef %1:vcc(s1)
S_ENDPGM 0, implicit %2
@@ -446,7 +446,7 @@ body: |
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
; WAVE32-LABEL: name: xor_s1_vcc_copy_to_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
@@ -457,7 +457,7 @@ body: |
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
@@ -490,7 +490,7 @@ body: |
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B64_]]
; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
@@ -502,7 +502,7 @@ body: |
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
@@ -536,7 +536,7 @@ body: |
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B64_]]
; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
@@ -548,7 +548,7 @@ body: |
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B32_]]
; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
index 98be95cac85b..2986cafedc8f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
@@ -242,16 +242,19 @@ body: |
; CHECK-LABEL: name: test_and_v3s32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
- ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<3 x s32>), 0
- ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT]], [[EXTRACT1]]
- ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[AND]](<2 x s32>), 0
- ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64
- ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[EXTRACT3]]
- ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[AND1]](s32), 64
- ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
+ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s32>), 0
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s32>) = G_AND %0, %1
@@ -287,22 +290,24 @@ body: |
; CHECK-LABEL: name: test_and_v5s32
; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY [[DEF]](<5 x s32>)
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 0
- ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 0
- ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT]], [[EXTRACT1]]
- ; CHECK: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[COPY]], [[AND]](<2 x s32>), 0
- ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 64
- ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 64
- ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT2]], [[EXTRACT3]]
- ; CHECK: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[AND1]](<2 x s32>), 64
- ; CHECK: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<5 x s32>), 128
- ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF1]](<5 x s32>), 128
- ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[EXTRACT5]]
- ; CHECK: [[INSERT2:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT1]], [[AND2]](s32), 128
- ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[INSERT3:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[INSERT2]](<5 x s32>), 0
- ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT3]](<8 x s32>)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
+ ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32)
+ ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
+ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
+ ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+ ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
+ ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR4]]
+ ; CHECK: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR5]]
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[AND2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s32>), 0
+ ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[EXTRACT]](<5 x s32>), 0
+ ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>)
%0:_(<5 x s32>) = G_IMPLICIT_DEF
%1:_(<5 x s32>) = G_IMPLICIT_DEF
%2:_(<5 x s32>) = G_AND %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
index 2939c599646a..4c38942c5c28 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
@@ -231,7 +231,6 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
@@ -247,34 +246,24 @@ body: |
; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32)
; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
- ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
- ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+ ; GFX8: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[BITCAST]]
+ ; GFX8: [[BSWAP1:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[BITCAST1]]
+ ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BSWAP]](<2 x s16>), [[BSWAP1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX8: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; GFX8: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX8: [[BSWAP:%[0-9]+]]:_(<2 x s16>) = G_BSWAP [[EXTRACT2]]
- ; GFX8: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX8: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[BSWAP]](<2 x s16>), 0
- ; GFX8: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; GFX8: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
- ; GFX8: [[BSWAP1:%[0-9]+]]:_(s16) = G_BSWAP [[COPY6]]
- ; GFX8: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT3]](<3 x s16>), 0
- ; GFX8: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT3]], [[BSWAP1]](s16), 32
- ; GFX8: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT4]](<4 x s16>), 0
- ; GFX8: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT4]](<3 x s16>), 0
- ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT5]](<4 x s16>)
+ ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32)
- ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
- ; GFX8: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
- ; GFX8: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
- ; GFX8: $vgpr0 = COPY [[COPY7]](s32)
- ; GFX8: $vgpr1 = COPY [[COPY8]](s32)
- ; GFX8: $vgpr2 = COPY [[COPY9]](s32)
+ ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+ ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX8: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+ ; GFX8: $vgpr0 = COPY [[COPY6]](s32)
+ ; GFX8: $vgpr1 = COPY [[COPY7]](s32)
+ ; GFX8: $vgpr2 = COPY [[COPY8]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
index e872d9e45214..698960ba03eb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
@@ -210,57 +210,85 @@ body: |
; SI-LABEL: name: test_fabs_v3s16
; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
- ; SI: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; SI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; SI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT1]]
- ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0
- ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FABS]](<2 x s16>), 0
- ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; SI: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; SI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT3]]
- ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0
- ; SI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FABS1]](s16), 32
- ; SI: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0
- ; SI: S_NOP 0, implicit [[EXTRACT4]](<3 x s16>)
+ ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+ ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+ ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; SI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST2]]
+ ; SI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]]
+ ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF1]](<2 x s16>)
+ ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>)
; VI-LABEL: name: test_fabs_v3s16
; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
- ; VI: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; VI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; VI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT1]]
- ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0
- ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FABS]](<2 x s16>), 0
- ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; VI: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT3]]
- ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0
- ; VI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FABS1]](s16), 32
- ; VI: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0
- ; VI: S_NOP 0, implicit [[EXTRACT4]](<3 x s16>)
+ ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+ ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+ ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; VI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST2]]
+ ; VI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]]
+ ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF1]](<2 x s16>)
+ ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>)
; GFX9-LABEL: name: test_fabs_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
- ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT1]]
- ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0
- ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FABS]](<2 x s16>), 0
- ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT3]]
- ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0
- ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FABS1]](s16), 32
- ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0
- ; GFX9: S_NOP 0, implicit [[EXTRACT4]](<3 x s16>)
+ ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+ ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF1]](s32)
+ ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR_TRUNC]]
+ ; GFX9: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR_TRUNC1]]
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF2]](<2 x s16>)
+ ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_FABS %0
S_NOP 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
index 967f1093df80..bd92234ccdd1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
@@ -422,25 +422,36 @@ body: |
; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
- ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0
- ; GFX9: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[EXTRACT2]], [[EXTRACT3]]
- ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[COPY]](<3 x s16>), 0
- ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FADD]](<2 x s16>), 0
- ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0
- ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32
- ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32
- ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[EXTRACT5]], [[EXTRACT6]]
- ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT4]](<3 x s16>), 0
- ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FADD1]](s16), 32
- ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0
- ; GFX9: S_NOP 0, implicit [[EXTRACT7]](<3 x s16>)
+ ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+ ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32)
+ ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0
+ ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+ ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF2]](s32)
+ ; GFX9: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
+ ; GFX9: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<2 x s16>), [[FADD1]](<2 x s16>), [[DEF3]](<2 x s16>)
+ ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
%2:_(<3 x s16>) = G_FADD %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
index 8253c4549609..0ace2229a69a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
@@ -287,21 +287,25 @@ body: |
; GFX9-LABEL: name: test_fcanonicalize_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
- ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT1]]
- ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0
- ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FCANONICALIZE]](<2 x s16>), 0
- ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT3]]
- ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0
- ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FCANONICALIZE1]](s16), 32
- ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0
- ; GFX9: S_NOP 0, implicit [[EXTRACT4]](<3 x s16>)
+ ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+ ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF1]](s32)
+ ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]]
+ ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]]
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FCANONICALIZE]](<2 x s16>), [[FCANONICALIZE1]](<2 x s16>), [[DEF2]](<2 x s16>)
+ ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_FCANONICALIZE %0
S_NOP 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
index 3d079e1701b7..e37d48787d10 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
@@ -512,29 +512,47 @@ body: |
; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0
- ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0
- ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0
- ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[EXTRACT3]], [[EXTRACT4]], [[EXTRACT5]]
- ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[COPY]](<3 x s16>), 0
- ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT3]], [[FMA]](<2 x s16>), 0
- ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT4]](<4 x s16>), 0
- ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32
- ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9: [[EXTRACT8:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT6]](<4 x s16>), 32
- ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0
- ; GFX9: [[EXTRACT9:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT7]](<4 x s16>), 32
- ; GFX9: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[EXTRACT7]], [[EXTRACT8]], [[EXTRACT9]]
- ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT6]](<3 x s16>), 0
- ; GFX9: [[INSERT9:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT8]], [[FMA1]](s16), 32
- ; GFX9: [[EXTRACT10:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT9]](<4 x s16>), 0
- ; GFX9: S_NOP 0, implicit [[EXTRACT10]](<3 x s16>)
+ ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+ ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF3]](s32)
+ ; GFX9: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
+ ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+ ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF3]](s32)
+ ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0
+ ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>)
+ ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+ ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+ ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
+ ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[DEF3]](s32)
+ ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]]
+ ; GFX9: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]]
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<2 x s16>), [[FMA1]](<2 x s16>), [[DEF4]](<2 x s16>)
+ ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; GFX9: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
%2:_(<3 x s16>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
index ad9919f5e996..77c8cf402bb7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
@@ -421,25 +421,36 @@ body: |
; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
- ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0
- ; GFX9: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[EXTRACT2]], [[EXTRACT3]]
- ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[COPY]](<3 x s16>), 0
- ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FMUL]](<2 x s16>), 0
- ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0
- ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32
- ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
- ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32
- ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[EXTRACT5]], [[EXTRACT6]]
- ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT4]](<3 x s16>), 0
- ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FMUL1]](s16), 32
- ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0
- ; GFX9: S_NOP 0, implicit [[EXTRACT7]](<3 x s16>)
+ ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
+ ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32)
+ ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0
+ ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+ ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+ ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32)
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF2]](s32)
+ ; GFX9: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
+ ; GFX9: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMUL]](<2 x s16>), [[FMUL1]](<2 x s16>), [[DEF3]](<2 x s16>)
+ ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
%2:_(<3 x s16>) = G_FMUL %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
index 815572df870f..8e8653b24645 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
@@ -208,57 +208,85 @@ body: |
; SI-LABEL: name: test_fneg_v3s16
; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
- ; SI: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; SI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; SI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT1]]
- ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0
- ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FNEG]](<2 x s16>), 0
- ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; SI: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; SI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT3]]
- ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0
- ; SI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FNEG1]](s16), 32
- ; SI: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0
- ; SI: S_NOP 0, implicit [[EXTRACT4]](<3 x s16>)
+ ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+ ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+ ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; SI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST2]]
+ ; SI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]]
+ ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF1]](<2 x s16>)
+ ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>)
; VI-LABEL: name: test_fneg_v3s16
; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
- ; VI: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; VI: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; VI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT1]]
- ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0
- ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FNEG]](<2 x s16>), 0
- ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; VI: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT3]]
- ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0
- ; VI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FNEG1]](s16), 32
- ; VI: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0
- ; VI: S_NOP 0, implicit [[EXTRACT4]](<3 x s16>)
+ ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+ ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
+ ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
+ ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; VI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST2]]
+ ; VI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]]
+ ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF1]](<2 x s16>)
+ ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>)
; GFX9-LABEL: name: test_fneg_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
- ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[EXTRACT]](<3 x s16>)
- ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
- ; GFX9: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT1]]
- ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0
- ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FNEG]](<2 x s16>), 0
- ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
- ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
- ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
- ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT3]]
- ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0
- ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FNEG1]](s16), 32
- ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0
- ; GFX9: S_NOP 0, implicit [[EXTRACT4]](<3 x s16>)
+ ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
+ ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+ ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF1]](s32)
+ ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR_TRUNC]]
+ ; GFX9: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR_TRUNC1]]
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF2]](<2 x s16>)
+ ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+ ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_FNEG %0
S_NOP 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
index 9e966d1e6e6d..52f91d366fc0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
@@ -242,16 +242,19 @@ body: |
; CHECK-LABEL: name: test_or_v3s32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
- ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<3 x s32>), 0
- ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[EXTRACT]], [[EXTRACT1]]
- ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[OR]](<2 x s32>), 0
- ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64
- ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT2]], [[EXTRACT3]]
- ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR1]](s32), 64
- ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
+ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[DEF1]](<2 x s32>)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s32>), 0
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s32>) = G_OR %0, %1
@@ -287,22 +290,24 @@ body: |
; CHECK-LABEL: name: test_or_v5s32
; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY [[DEF]](<5 x s32>)
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 0
- ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 0
- ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[EXTRACT]], [[EXTRACT1]]
- ; CHECK: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[COPY]], [[OR]](<2 x s32>), 0
- ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 64
- ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 64
- ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[EXTRACT2]], [[EXTRACT3]]
- ; CHECK: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[OR1]](<2 x s32>), 64
- ; CHECK: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<5 x s32>), 128
- ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF1]](<5 x s32>), 128
- ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[EXTRACT4]], [[EXTRACT5]]
- ; CHECK: [[INSERT2:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT1]], [[OR2]](s32), 128
- ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[INSERT3:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[INSERT2]](<5 x s32>), 0
- ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT3]](<8 x s32>)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
+ ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32)
+ ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
+ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
+ ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+ ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
+ ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]]
+ ; CHECK: [[OR2:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]]
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[OR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s32>), 0
+ ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[EXTRACT]](<5 x s32>), 0
+ ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>)
%0:_(<5 x s32>) = G_IMPLICIT_DEF
%1:_(<5 x s32>) = G_IMPLICIT_DEF
%2:_(<5 x s32>) = G_OR %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
index 30911ac8240f..1d4567db68cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
@@ -242,16 +242,19 @@ body: |
; CHECK-LABEL: name: test_xor_v3s32
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
- ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<3 x s32>), 0
- ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
- ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[EXTRACT]], [[EXTRACT1]]
- ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[XOR]](<2 x s32>), 0
- ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64
- ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
- ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT2]], [[EXTRACT3]]
- ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[XOR1]](s32), 64
- ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32)
+ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32)
+ ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[DEF1]](<2 x s32>)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s32>), 0
+ ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s32>) = G_XOR %0, %1
@@ -287,22 +290,24 @@ body: |
; CHECK-LABEL: name: test_xor_v5s32
; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY [[DEF]](<5 x s32>)
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 0
- ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 0
- ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[EXTRACT]], [[EXTRACT1]]
- ; CHECK: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[COPY]], [[XOR]](<2 x s32>), 0
- ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 64
- ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 64
- ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[EXTRACT2]], [[EXTRACT3]]
- ; CHECK: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[XOR1]](<2 x s32>), 64
- ; CHECK: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<5 x s32>), 128
- ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF1]](<5 x s32>), 128
- ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT4]], [[EXTRACT5]]
- ; CHECK: [[INSERT2:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT1]], [[XOR2]](s32), 128
- ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[INSERT3:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[INSERT2]](<5 x s32>), 0
- ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT3]](<8 x s32>)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
+ ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32)
+ ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
+ ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
+ ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+ ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32)
+ ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
+ ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]]
+ ; CHECK: [[XOR2:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]]
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[XOR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s32>), 0
+ ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[EXTRACT]](<5 x s32>), 0
+ ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>)
%0:_(<5 x s32>) = G_IMPLICIT_DEF
%1:_(<5 x s32>) = G_IMPLICIT_DEF
%2:_(<5 x s32>) = G_XOR %0, %1
More information about the llvm-commits
mailing list