[llvm] [AArch64][GlobalISel] Combine G_EXTRACT_VECTOR_ELT and G_BUILD_VECTOR sequences into G_SHUFFLE_VECTOR (PR #110545)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 05:47:58 PDT 2024
https://github.com/ValentijnvdBeek updated https://github.com/llvm/llvm-project/pull/110545
>From 76d4bdab54655ccaafd40575d12e4d12da81b824 Mon Sep 17 00:00:00 2001
From: Valentijn van de Beek <valentijn at posteo.net>
Date: Sat, 28 Sep 2024 01:24:29 +0200
Subject: [PATCH 1/7] [GlobalISel] Remove inaccurate input vector restriction
In the buildShuffleVector method, there is a restriction that the
input vectors must be larger than the mask. However, this is not
the definition of a ShuffleVector instruction that is used inside
of our test suite.
For example:
shuffle_concat_1 in combine_shuffle_vector.mir: 4xs8 -> 16xs8
v3s8_crash in legalize_insert_vector_elt: 3xs8 -> 12xs8
shuffle_vector_to_concat_vector_45670123 in prelegalizercombiner-shuffle-vector: 4xs32 -> 12xs32
---
llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 59f2fc633f5de7..1ddecefa173838 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -772,8 +772,6 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
LLT DstTy = Res.getLLTTy(*getMRI());
LLT Src1Ty = Src1.getLLTTy(*getMRI());
LLT Src2Ty = Src2.getLLTTy(*getMRI());
- assert((size_t)(Src1Ty.getNumElements() + Src2Ty.getNumElements()) >=
- Mask.size());
assert(DstTy.getElementType() == Src1Ty.getElementType() &&
DstTy.getElementType() == Src2Ty.getElementType());
(void)DstTy;
>From 0d5d582b453be6d5f2023b6db2ded874971a8900 Mon Sep 17 00:00:00 2001
From: Valentijn van de Beek <valentijn at posteo.net>
Date: Sat, 5 Oct 2024 17:38:02 +0200
Subject: [PATCH 2/7] [GISel] Factor out the mask matching code from the
shufflevector combiner
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 11 +++
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 82 +++++++++++--------
2 files changed, 58 insertions(+), 35 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 76d51ab819f441..b503a53dd98ca8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -249,6 +249,17 @@ class CombinerHelper {
/// or an implicit_def if \p Ops is empty.
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector<Register> &Ops);
+ /// Check if an instruction whose operations can be represented
+ /// by a vector mask can be replaced by a concat_vectors.
+ /// \p Ops will contain the operands to produce the flattened
+ /// concat_vectors.
+ /// \p Mask is an array to numbers that represent the order that
+ /// the elements of \p SrcRegs will be put into \p DstReg.
+ bool matchVectorMaskSequence(MachineInstr &MI, SmallVectorImpl<Register> &Ops,
+ const Register DstReg,
+ const std::pair<Register, Register> SrcRegs,
+ ArrayRef<int> Mask);
+
/// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
/// Returns true if MI changed.
///
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c279289f9161bf..d523af15486c97 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -40,6 +40,7 @@
#include <cmath>
#include <optional>
#include <tuple>
+#include <utility>
#define DEBUG_TYPE "gi-combiner"
@@ -472,39 +473,16 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
return false;
}
-bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
- SmallVectorImpl<Register> &Ops) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
- "Invalid instruction kind");
- LLT DstType = MRI.getType(MI.getOperand(0).getReg());
- Register Src1 = MI.getOperand(1).getReg();
- LLT SrcType = MRI.getType(Src1);
- // As bizarre as it may look, shuffle vector can actually produce
- // scalar! This is because at the IR level a <1 x ty> shuffle
- // vector is perfectly valid.
- unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
- unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
-
- // If the resulting vector is smaller than the size of the source
- // vectors being concatenated, we won't be able to replace the
- // shuffle vector into a concat_vectors.
- //
- // Note: We may still be able to produce a concat_vectors fed by
- // extract_vector_elt and so on. It is less clear that would
- // be better though, so don't bother for now.
- //
- // If the destination is a scalar, the size of the sources doesn't
- // matter. we will lower the shuffle to a plain copy. This will
- // work only if the source and destination have the same size. But
- // that's covered by the next condition.
- //
- // TODO: If the size between the source and destination don't match
- // we could still emit an extract vector element in that case.
- if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
- return false;
+bool CombinerHelper::matchVectorMaskSequence(
+ MachineInstr &MI, SmallVectorImpl<Register> &Ops, const Register DstReg,
+ const std::pair<Register, Register> SrcRegs, ArrayRef<int> Mask) {
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcRegs.first);
// Check that the shuffle mask can be broken evenly between the
// different sources.
+ const unsigned DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
+ const unsigned SrcNumElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
if (DstNumElts % SrcNumElts != 0)
return false;
@@ -513,7 +491,6 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
// vectors.
unsigned NumConcat = DstNumElts / SrcNumElts;
SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
- ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
for (unsigned i = 0; i != DstNumElts; ++i) {
int Idx = Mask[i];
// Undef value.
@@ -532,21 +509,56 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
// The shuffle is concatenating multiple vectors together.
// Collect the different operands for that.
Register UndefReg;
- Register Src2 = MI.getOperand(2).getReg();
for (auto Src : ConcatSrcs) {
if (Src < 0) {
if (!UndefReg) {
Builder.setInsertPt(*MI.getParent(), MI);
- UndefReg = Builder.buildUndef(SrcType).getReg(0);
+ UndefReg = Builder.buildUndef(SrcTy).getReg(0);
}
Ops.push_back(UndefReg);
} else if (Src == 0)
- Ops.push_back(Src1);
+ Ops.push_back(SrcRegs.first);
else
- Ops.push_back(Src2);
+ Ops.push_back(SrcRegs.second);
}
return true;
}
+bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
+ SmallVectorImpl<Register> &Ops) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Invalid instruction kind");
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ Register Src1 = MI.getOperand(1).getReg();
+ LLT SrcType = MRI.getType(Src1);
+ // As bizarre as it may look, shuffle vector can actually produce
+ // scalar! This is because at the IR level a <1 x ty> shuffle
+ // vector is perfectly valid.
+ unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
+ unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
+
+ // If the resulting vector is smaller than the size of the source
+ // vectors being concatenated, we won't be able to replace the
+ // shuffle vector into a concat_vectors.
+ //
+ // Note: We may still be able to produce a concat_vectors fed by
+ // extract_vector_elt and so on. It is less clear that would
+ // be better though, so don't bother for now.
+ //
+ // If the destination is a scalar, the size of the sources doesn't
+ // matter. we will lower the shuffle to a plain copy. This will
+ // work only if the source and destination have the same size. But
+ // that's covered by the next condition.
+ //
+ // TODO: If the size between the source and destination don't match
+ // we could still emit an extract vector element in that case.
+ if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
+ return false;
+
+ return matchVectorMaskSequence(
+ MI, Ops, MI.getOperand(0).getReg(),
+ std::make_pair(MI.getOperand(1).getReg(), MI.getOperand(2).getReg()),
+ MI.getOperand(3).getShuffleMask());
+}
void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
const ArrayRef<Register> Ops) {
>From 1c9d534be76ce944d54c5f166c0c93b176187fe9 Mon Sep 17 00:00:00 2001
From: Valentijn van de Beek <valentijn at posteo.net>
Date: Sat, 28 Sep 2024 02:05:49 +0200
Subject: [PATCH 3/7] [AArch64][GlobalISel] Combine G_EXTRACT_VECTOR_ELT and
G_BUILD_VECTOR sequences into G_SHUFFLE_VECTOR
This combine tries to find all the build vectors whose source elements all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors. One example where this may happen is for AI chips where there are a lot of matrix multiplications. Typically there vectors are disected and then rearranged into the right transformation.
E.g.
%donor1(<2 x s32>) = COPY $d0
%donor2(<2 x s32>) = COPY $d1
%ext1 = G_EXTRACT_VECTOR_ELT %donor1, 0
%ext2 = G_EXTRACT_VECTOR_ELT %donor1, 1
%ext3 = G_EXTRACT_VECTOR_ELT %donor2, 0
%ext4 = G_EXTRACT_VECTOR_ELT %donor2, 1
%vector = G_BUILD_VECTOR %ext1, %ext2, %ext3, %ext4
==>
replace with: %vector = G_SHUFFLE_VECTOR %donor1, %donor2, shufflemask(0, 1, 2, 3)
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 9 +
.../include/llvm/Target/GlobalISel/Combine.td | 13 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 92 ++++++
.../GlobalISel/combine-build-vector.mir | 302 ++++++++++++++++++
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 104 +-----
llvm/test/CodeGen/AArch64/arm64-rev.ll | 9 +-
6 files changed, 437 insertions(+), 92 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index b503a53dd98ca8..5460b730f40912 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -591,6 +591,15 @@ class CombinerHelper {
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
+ /// Combine extracts of two different arrays into one build vector into a
+ /// shuffle vector.
+ bool matchCombineExtractToShuffle(
+ MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &MatchInfo,
+ std::pair<Register, Register> &VectorRegisters);
+ void applyCombineExtractToShuffle(
+ MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &MatchInfo,
+ std::pair<Register, Register> &VectorRegisters);
+
bool matchExtractAllEltsFromBuildVector(
MachineInstr &MI,
SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f838c6e62a2ce3..0525bfe1b0ddb2 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -905,6 +905,16 @@ def extract_vec_elt_build_vec : GICombineRule<
[{ return Helper.matchExtractVecEltBuildVec(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyExtractVecEltBuildVec(*${root}, ${matchinfo}); }])>;
+def extract_vector_register_to_id_mapping_matchinfo :
+ GIDefMatchData<"SmallVector<std::pair<Register, int>>">;
+def vector_reg_pair_matchinfo :
+ GIDefMatchData<"std::pair<Register, Register>">;
+def extract_vector_element_build_vector_to_shuffle_vector : GICombineRule<
+ (defs root:$root, extract_vector_register_to_id_mapping_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair),
+ (match (wip_match_opcode G_BUILD_VECTOR):$root,
+ [{ return Helper.matchCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }]),
+ (apply [{ Helper.applyCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }])>;
+
// Fold away full elt extracts from a build_vector.
def extract_all_elts_from_build_vector_matchinfo :
GIDefMatchData<"SmallVector<std::pair<Register, MachineInstr*>>">;
@@ -916,7 +926,8 @@ def extract_all_elts_from_build_vector : GICombineRule<
def extract_vec_elt_combines : GICombineGroup<[
extract_vec_elt_build_vec,
- extract_all_elts_from_build_vector]>;
+ extract_all_elts_from_build_vector,
+ extract_vector_element_build_vector_to_shuffle_vector]>;
def funnel_shift_from_or_shift : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d523af15486c97..3e07c9a7432684 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -38,6 +38,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
+#include <llvm/ADT/SmallVector.h>
#include <optional>
#include <tuple>
#include <utility>
@@ -4217,6 +4218,97 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
replaceSingleDefInstWithReg(MI, Reg);
}
+bool CombinerHelper::matchCombineExtractToShuffle(
+ MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &VecIndexPair,
+ std::pair<Register, Register> &VectorRegisters) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ const GBuildVector *Build = cast<GBuildVector>(&MI);
+ // This combine tries to find all the build vectors whose source elements
+ // all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors.
+ // One example where this may happen is for AI chips where there are a lot
+ // of matrix multiplications. Typically there vectors are disected and then
+ // rearranged into the right transformation.
+ // E.g.
+ // %donor1(<2 x s32>) = COPY $d0
+ // %donor2(<2 x s32>) = COPY $d1
+ // %ext1 = G_EXTRACT_VECTOR_ELT %donor1, 0
+ // %ext2 = G_EXTRACT_VECTOR_ELT %donor1, 1
+ // %ext3 = G_EXTRACT_VECTOR_ELT %donor2, 0
+ // %ext4 = G_EXTRACT_VECTOR_ELT %donor2, 1
+ /// %vector = G_BUILD_VECTOR %ext1, %ext2, %ext3, %ext4
+ // ==>
+ // replace with:
+ // %vector = G_SHUFFLE_VECTOR %donor1, %donor2, shufflemask(0, 1, 2, 3)
+ SmallSetVector<Register, 2> RegisterVector;
+ const unsigned NumElements = Build->getNumSources();
+ for (unsigned Index = 0; Index < NumElements; Index++) {
+ Register SrcReg = peekThroughBitcast(Build->getSourceReg(Index), MRI);
+ auto *ExtractInstr = getOpcodeDef<GExtractVectorElement>(SrcReg, MRI);
+ if (!ExtractInstr)
+ return false;
+
+ // For shufflemasks we need to know exactly what index to place each element
+ // so if it this build vector doesn't use exclusively constants than we
+ // can't replace with a shufflevector
+ auto Cst = getIConstantVRegVal(ExtractInstr->getIndexReg(), MRI);
+ if (!Cst)
+ return false;
+ unsigned Idx = Cst->getZExtValue();
+
+ Register VectorReg = ExtractInstr->getVectorReg();
+ RegisterVector.insert(VectorReg);
+ VecIndexPair.emplace_back(std::make_pair(VectorReg, Idx));
+ }
+
+ // Create a pair so that we don't need to look for them later. This code is
+ // incorrect if we have more than two vectors in the set. Since we can only
+ // put two vectors in a shuffle, we reject any solution with more than two
+ // anyways.
+ VectorRegisters =
+ std::make_pair(RegisterVector.front(), RegisterVector.back());
+
+ // We check that they're the same type before running. We can also grow the
+ // smaller one to the target size, but there isn't an elegant way to do that
+ // until we have a good lowering for G_EXTRACT_SUBVECTOR.
+ if (MRI.getType(VectorRegisters.first) != MRI.getType(VectorRegisters.second))
+ return false;
+
+ return RegisterVector.size() <= 2;
+}
+
+void CombinerHelper::applyCombineExtractToShuffle(
+ MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &MatchInfo,
+ std::pair<Register, Register> &VectorRegisters) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+
+ const Register FirstRegister = VectorRegisters.first;
+ const LLT FirstRegisterType = MRI.getType(FirstRegister);
+ const unsigned VectorSize = FirstRegisterType.getNumElements();
+ SmallVector<int, 32> ShuffleMask;
+ for (auto &Pair : MatchInfo) {
+ const Register VectorReg = Pair.first;
+ int Idx = Pair.second;
+
+ if (VectorReg != VectorRegisters.first) {
+ Idx += VectorSize;
+ }
+ ShuffleMask.emplace_back(Idx);
+ }
+
+ // We could reuse the same vector register and shuffle them both together
+ // but it is nicer for later optimizations to explicitely make it undef.
+ const GBuildVector *BuildVector = cast<GBuildVector>(&MI);
+ Register SecondRegister = VectorRegisters.second;
+ if (FirstRegister == SecondRegister) {
+ SecondRegister = MRI.createGenericVirtualRegister(FirstRegisterType);
+ Builder.buildUndef(SecondRegister);
+ }
+
+ Builder.buildShuffleVector(BuildVector->getOperand(0), FirstRegister,
+ SecondRegister, ShuffleMask);
+ MI.eraseFromParent();
+}
+
bool CombinerHelper::matchExtractAllEltsFromBuildVector(
MachineInstr &MI,
SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
index 93f6051c3bd3b7..3cc836b9718297 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
@@ -151,3 +151,305 @@ body: |
RET_ReallyLR implicit $x0
...
+---
+name: reverse_concat_buildvector_shuffle
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 7, 6, 5, 4)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(s64) = G_CONSTANT i64 2
+ %5:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
+ RET_ReallyLR implicit %18
+...
+---
+name: reverse_interweave_buildvector_shuffle
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: reverse_interweave_buildvector_shuffle
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4, 7, 2, 5, 0)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(s64) = G_CONSTANT i64 2
+ %5:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
+ RET_ReallyLR implicit %18
+...
+
+---
+name: reverse_interweave_same_size_as_dest_buildvector_shuffle
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: reverse_interweave_same_size_as_dest_buildvector_shuffle
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(s64) = G_CONSTANT i64 2
+ %5:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %14:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_
+ RET_ReallyLR implicit %14
+...
+---
+name: reverse_interweave_half_size_as_dest_buildvector_shuffle
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: reverse_interweave_half_size_as_dest_buildvector_shuffle
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 4)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<2 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_
+ RET_ReallyLR implicit %12
+...
+---
+name: reverse_concat_single_buildvector_shuffle
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: reverse_concat_single_buildvector_shuffle
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(3, 1, 0, 2)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(s64) = G_CONSTANT i64 0
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(s64) = G_CONSTANT i64 2
+ %4:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %1:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %18:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_
+ RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_double_buildvector_shuffle
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: reverse_concat_double_buildvector_shuffle
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 6, 4, 5, 7, 1, 0, 2, 0, 5, 4, 1, 7)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<16 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(s64) = G_CONSTANT i64 2
+ %5:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
+ %18:_(<16 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_, %12:_, %13:_, %11:_, %13:_, %16:_, %15:_, %12:_, %17:_
+ RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_buildvector_shuffle_three_sources
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1, $q2
+ ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_three_sources
+ ; CHECK: liveins: $q0, $q1, $q2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC1]](s32), [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC1]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(<4 x s32>) = COPY $q2
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(s64) = G_CONSTANT i64 2
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %4:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %3:_(s64)
+ %18:_(<8 x s32>) = G_BUILD_VECTOR %12:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %12:_
+ RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_buildvector_shuffle_different_element_size
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $d0
+ ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_different_element_size
+ ; CHECK: liveins: $q0, $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[DEF]](s32), [[DEF]](s32), [[EVEC1]](s32), [[EVEC2]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<2 x s32>) = COPY $d0
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(s64) = G_CONSTANT i64 2
+ %5:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %5:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %4:_(s64)
+ %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %11:_, %12:_
+ RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_buildvector_shuffle_different_type
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_different_type
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C3]](s64)
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC6]](s32), [[EVEC7]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<8 x s16>) = COPY $q1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(s64) = G_CONSTANT i64 2
+ %5:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %5:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %4:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %3:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %2:_(s64)
+ %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
+ RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_buildvector_shuffle_non_constant_id
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $d0, $q0, $q1
+ ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_non_constant_id
+ ; CHECK: liveins: $d0, $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[COPY2]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[COPY2]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(s64) = COPY $d0
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_
+ RET_ReallyLR implicit %12
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index c0d91c1e0c836b..07cb5379a075c2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1351,30 +1351,10 @@ define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
}
define <8 x i8> @getl(<16 x i8> %x) #0 {
-; CHECK-SD-LABEL: getl:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: getl:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov v1.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov v1.b[1], v2.b[0]
-; CHECK-GI-NEXT: mov b2, v0.b[3]
-; CHECK-GI-NEXT: mov v1.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[4]
-; CHECK-GI-NEXT: mov v1.b[3], v2.b[0]
-; CHECK-GI-NEXT: mov b2, v0.b[5]
-; CHECK-GI-NEXT: mov v1.b[4], v3.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[6]
-; CHECK-GI-NEXT: mov b0, v0.b[7]
-; CHECK-GI-NEXT: mov v1.b[5], v2.b[0]
-; CHECK-GI-NEXT: mov v1.b[6], v3.b[0]
-; CHECK-GI-NEXT: mov v1.b[7], v0.b[0]
-; CHECK-GI-NEXT: fmov d0, d1
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: getl:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
%vecext = extractelement <16 x i8> %x, i32 0
%vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
%vecext1 = extractelement <16 x i8> %x, i32 1
@@ -1923,49 +1903,12 @@ entry:
}
define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
-; CHECK-SD-LABEL: test_concat_v16i8_v8i8_v8i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov b3, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov v2.b[1], v3.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[2], v4.b[0]
-; CHECK-GI-NEXT: mov b4, v0.b[4]
-; CHECK-GI-NEXT: mov v2.b[3], v3.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[5]
-; CHECK-GI-NEXT: mov v2.b[4], v4.b[0]
-; CHECK-GI-NEXT: mov b4, v0.b[6]
-; CHECK-GI-NEXT: mov b0, v0.b[7]
-; CHECK-GI-NEXT: mov v2.b[5], v3.b[0]
-; CHECK-GI-NEXT: mov b3, v1.b[2]
-; CHECK-GI-NEXT: mov v2.b[6], v4.b[0]
-; CHECK-GI-NEXT: mov v2.b[7], v0.b[0]
-; CHECK-GI-NEXT: mov b0, v1.b[1]
-; CHECK-GI-NEXT: mov v2.b[8], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[9], v0.b[0]
-; CHECK-GI-NEXT: mov b0, v1.b[3]
-; CHECK-GI-NEXT: mov v2.b[10], v3.b[0]
-; CHECK-GI-NEXT: mov b3, v1.b[4]
-; CHECK-GI-NEXT: mov v2.b[11], v0.b[0]
-; CHECK-GI-NEXT: mov b0, v1.b[5]
-; CHECK-GI-NEXT: mov v2.b[12], v3.b[0]
-; CHECK-GI-NEXT: mov b3, v1.b[6]
-; CHECK-GI-NEXT: mov v2.b[13], v0.b[0]
-; CHECK-GI-NEXT: mov b0, v1.b[7]
-; CHECK-GI-NEXT: mov v2.b[14], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[15], v0.b[0]
-; CHECK-GI-NEXT: mov v0.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ret
entry:
%vecext = extractelement <8 x i8> %x, i32 0
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
@@ -2094,27 +2037,12 @@ entry:
}
define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
-; CHECK-SD-LABEL: test_concat_v8i16_v4i16_v4i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v4i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov v2.h[0], v0.h[0]
-; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: mov v2.h[1], v0.h[1]
-; CHECK-GI-NEXT: mov v2.h[2], v0.h[2]
-; CHECK-GI-NEXT: mov v2.h[3], v0.h[3]
-; CHECK-GI-NEXT: mov v2.h[4], v1.h[0]
-; CHECK-GI-NEXT: mov v2.h[5], v1.h[1]
-; CHECK-GI-NEXT: mov v2.h[6], v1.h[2]
-; CHECK-GI-NEXT: mov v2.h[7], v1.h[3]
-; CHECK-GI-NEXT: mov v0.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ret
entry:
%vecext = extractelement <4 x i16> %x, i32 0
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index f548a0e01feee6..a728836fb05585 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -397,10 +397,13 @@ define void @test_vrev64(ptr nocapture %source, ptr nocapture %dst) nounwind ssp
;
; CHECK-GI-LABEL: test_vrev64:
; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI27_0
; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: add x8, x1, #2
-; CHECK-GI-NEXT: st1.h { v0 }[6], [x1]
-; CHECK-GI-NEXT: st1.h { v0 }[5], [x8]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI27_0]
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: mov h1, v0[1]
+; CHECK-GI-NEXT: str h0, [x1]
+; CHECK-GI-NEXT: str h1, [x1, #2]
; CHECK-GI-NEXT: ret
entry:
%tmp2 = load <8 x i16>, ptr %source, align 4
>From 11d336c09fef58c2309fa246fe086e9a1f4335b4 Mon Sep 17 00:00:00 2001
From: Valentijn van de Beek <valentijn at posteo.net>
Date: Tue, 1 Oct 2024 15:36:21 +0200
Subject: [PATCH 4/7] Fixup! Remove redudant G_BUILD_VECTOR assert
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3e07c9a7432684..c1d9dddb45bcbd 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4221,7 +4221,6 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
bool CombinerHelper::matchCombineExtractToShuffle(
MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &VecIndexPair,
std::pair<Register, Register> &VectorRegisters) {
- assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
const GBuildVector *Build = cast<GBuildVector>(&MI);
// This combine tries to find all the build vectors whose source elements
// all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors.
>From 549bd183b2e5f68b418472ed259d0fcefead3c4d Mon Sep 17 00:00:00 2001
From: Valentijn van de Beek <valentijn at posteo.net>
Date: Tue, 1 Oct 2024 21:44:59 +0200
Subject: [PATCH 5/7] Fixup! Add aditional tests
---
.../GlobalISel/combine-build-vector.mir | 74 +++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
index 3cc836b9718297..3abb334bb5b813 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
@@ -453,3 +453,77 @@ body: |
%12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_
RET_ReallyLR implicit %12
...
+---
+name: reverse_concat_buildvector_shuffle_other_sources
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_other_sources
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[C2]](s32), [[EVEC2]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s32>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(s64) = G_CONSTANT i64 2
+ %5:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %12:_(s32) = G_CONSTANT i32 42
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %18:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_
+ RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_buildvector_shuffle_trunc
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_trunc
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C3]](s64)
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR_TRUNC [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC6]](s32), [[EVEC7]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR_TRUNC]](<8 x s16>)
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(s64) = G_CONSTANT i64 2
+ %5:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %18:_(<8 x s16>) = G_BUILD_VECTOR_TRUNC %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
+ RET_ReallyLR implicit %18
+...
>From 7ad6fd3d3324cd88e6668867cf175134d375c615 Mon Sep 17 00:00:00 2001
From: Valentijn van de Beek <valentijn at posteo.net>
Date: Tue, 1 Oct 2024 21:46:25 +0200
Subject: [PATCH 6/7] Fixup! Create undef using buildUndef
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c1d9dddb45bcbd..465faec6d5187d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4295,12 +4295,11 @@ void CombinerHelper::applyCombineExtractToShuffle(
}
// We could reuse the same vector register and shuffle them both together
- // but it is nicer for later optimizations to explicitely make it undef.
+ // but it is nicer for later optimizations to explicitly make it undef.
const GBuildVector *BuildVector = cast<GBuildVector>(&MI);
Register SecondRegister = VectorRegisters.second;
if (FirstRegister == SecondRegister) {
- SecondRegister = MRI.createGenericVirtualRegister(FirstRegisterType);
- Builder.buildUndef(SecondRegister);
+ SecondRegister = Builder.buildUndef(FirstRegisterType).getReg(0);
}
Builder.buildShuffleVector(BuildVector->getOperand(0), FirstRegister,
>From 4ecb536f9f6a933d4fddc62b58fd489d894a5ea7 Mon Sep 17 00:00:00 2001
From: Valentijn van de Beek <valentijn at posteo.net>
Date: Fri, 11 Oct 2024 13:48:01 +0200
Subject: [PATCH 7/7] fixup! Directly run the shuffle vector analysis code
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 9 +-
.../include/llvm/Target/GlobalISel/Combine.td | 12 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 57 +--
.../GlobalISel/combine-build-vector.mir | 339 ++++++++----------
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 28 +-
llvm/test/CodeGen/AArch64/arm64-rev.ll | 9 +-
6 files changed, 198 insertions(+), 256 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 5460b730f40912..30d5a9ebd18aa4 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -593,12 +593,9 @@ class CombinerHelper {
/// Combine extracts of two different arrays into one build vector into a
/// shuffle vector.
- bool matchCombineExtractToShuffle(
- MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &MatchInfo,
- std::pair<Register, Register> &VectorRegisters);
- void applyCombineExtractToShuffle(
- MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &MatchInfo,
- std::pair<Register, Register> &VectorRegisters);
+ bool
+ matchCombineExtractToShuffle(MachineInstr &MI, SmallVectorImpl<Register> &Ops,
+ std::pair<Register, Register> &VectorRegisters);
bool matchExtractAllEltsFromBuildVector(
MachineInstr &MI,
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 0525bfe1b0ddb2..256dcc2815546b 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -905,15 +905,15 @@ def extract_vec_elt_build_vec : GICombineRule<
[{ return Helper.matchExtractVecEltBuildVec(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyExtractVecEltBuildVec(*${root}, ${matchinfo}); }])>;
-def extract_vector_register_to_id_mapping_matchinfo :
- GIDefMatchData<"SmallVector<std::pair<Register, int>>">;
+def extract_vector_register_sources_matchinfo :
+ GIDefMatchData<"SmallVector<Register>">;
def vector_reg_pair_matchinfo :
GIDefMatchData<"std::pair<Register, Register>">;
def extract_vector_element_build_vector_to_shuffle_vector : GICombineRule<
- (defs root:$root, extract_vector_register_to_id_mapping_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair),
+ (defs root:$root, extract_vector_register_sources_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair),
(match (wip_match_opcode G_BUILD_VECTOR):$root,
[{ return Helper.matchCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }]),
- (apply [{ Helper.applyCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }])>;
+ (apply [{ Helper.applyCombineShuffleVector(*${root}, ${matchinfo}); }])>;
// Fold away full elt extracts from a build_vector.
def extract_all_elts_from_build_vector_matchinfo :
@@ -926,8 +926,8 @@ def extract_all_elts_from_build_vector : GICombineRule<
def extract_vec_elt_combines : GICombineGroup<[
extract_vec_elt_build_vec,
- extract_all_elts_from_build_vector,
- extract_vector_element_build_vector_to_shuffle_vector]>;
+ extract_vector_element_build_vector_to_shuffle_vector,
+ extract_all_elts_from_build_vector]>;
def funnel_shift_from_or_shift : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 465faec6d5187d..6856ea42cb30f6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4219,9 +4219,12 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
}
bool CombinerHelper::matchCombineExtractToShuffle(
- MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &VecIndexPair,
+ MachineInstr &MI, SmallVectorImpl<Register> &Ops,
std::pair<Register, Register> &VectorRegisters) {
const GBuildVector *Build = cast<GBuildVector>(&MI);
+ const unsigned SrcNumElts =
+ MRI.getType(MI.getOperand(0).getReg()).getNumElements();
+
// This combine tries to find all the build vectors whose source elements
// all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors.
// One example where this may happen is for AI chips where there are a lot
@@ -4239,6 +4242,7 @@ bool CombinerHelper::matchCombineExtractToShuffle(
// replace with:
// %vector = G_SHUFFLE_VECTOR %donor1, %donor2, shufflemask(0, 1, 2, 3)
SmallSetVector<Register, 2> RegisterVector;
+ SmallVector<int, 32> VectorMask;
const unsigned NumElements = Build->getNumSources();
for (unsigned Index = 0; Index < NumElements; Index++) {
Register SrcReg = peekThroughBitcast(Build->getSourceReg(Index), MRI);
@@ -4246,17 +4250,21 @@ bool CombinerHelper::matchCombineExtractToShuffle(
if (!ExtractInstr)
return false;
+ RegisterVector.insert(ExtractInstr->getVectorReg());
+
// For shufflemasks we need to know exactly what index to place each element
// so if it this build vector doesn't use exclusively constants than we
// can't replace with a shufflevector
auto Cst = getIConstantVRegVal(ExtractInstr->getIndexReg(), MRI);
if (!Cst)
return false;
+
unsigned Idx = Cst->getZExtValue();
+ if (ExtractInstr->getVectorReg() != RegisterVector.front()) {
+ Idx += SrcNumElts;
+ }
- Register VectorReg = ExtractInstr->getVectorReg();
- RegisterVector.insert(VectorReg);
- VecIndexPair.emplace_back(std::make_pair(VectorReg, Idx));
+ VectorMask.emplace_back(Idx);
}
// Create a pair so that we don't need to look for them later. This code is
@@ -4267,44 +4275,17 @@ bool CombinerHelper::matchCombineExtractToShuffle(
std::make_pair(RegisterVector.front(), RegisterVector.back());
// We check that they're the same type before running. We can also grow the
- // smaller one to the target size, but there isn't an elegant way to do that
- // until we have a good lowering for G_EXTRACT_SUBVECTOR.
+ // smaller one tro the target size, but there isn't an elegant way to do that
+ // until we have a good lowerng for G_EXTRACT_SUBVECTOR.
+ // Apparently if they are the same, they don't necessary have the same type?
if (MRI.getType(VectorRegisters.first) != MRI.getType(VectorRegisters.second))
return false;
- return RegisterVector.size() <= 2;
-}
-
-void CombinerHelper::applyCombineExtractToShuffle(
- MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &MatchInfo,
- std::pair<Register, Register> &VectorRegisters) {
- assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
-
- const Register FirstRegister = VectorRegisters.first;
- const LLT FirstRegisterType = MRI.getType(FirstRegister);
- const unsigned VectorSize = FirstRegisterType.getNumElements();
- SmallVector<int, 32> ShuffleMask;
- for (auto &Pair : MatchInfo) {
- const Register VectorReg = Pair.first;
- int Idx = Pair.second;
-
- if (VectorReg != VectorRegisters.first) {
- Idx += VectorSize;
- }
- ShuffleMask.emplace_back(Idx);
- }
-
- // We could reuse the same vector register and shuffle them both together
- // but it is nicer for later optimizations to explicitly make it undef.
- const GBuildVector *BuildVector = cast<GBuildVector>(&MI);
- Register SecondRegister = VectorRegisters.second;
- if (FirstRegister == SecondRegister) {
- SecondRegister = Builder.buildUndef(FirstRegisterType).getReg(0);
- }
+ if (RegisterVector.size() > 2)
+ return false;
- Builder.buildShuffleVector(BuildVector->getOperand(0), FirstRegister,
- SecondRegister, ShuffleMask);
- MI.eraseFromParent();
+ return matchVectorMaskSequence(MI, Ops, MI.getOperand(0).getReg(),
+ VectorRegisters, VectorMask);
}
bool CombinerHelper::matchExtractAllEltsFromBuildVector(
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
index 3abb334bb5b813..a231bc8f29ef66 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
@@ -152,248 +152,196 @@ body: |
...
---
-name: reverse_concat_buildvector_shuffle
+name: concat_buildvector_shuffle
tracksRegLiveness: true
body: |
bb.1:
liveins: $q0, $q1
- ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle
+ ; CHECK-LABEL: name: concat_buildvector_shuffle
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 7, 6, 5, 4)
- ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<8 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(s64) = G_CONSTANT i64 0
%3:_(s64) = G_CONSTANT i64 1
%4:_(s64) = G_CONSTANT i64 2
%5:_(s64) = G_CONSTANT i64 3
- %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
- %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
- %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
- %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
- %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
- %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
- %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
%18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
RET_ReallyLR implicit %18
...
---
-name: reverse_interweave_buildvector_shuffle
+name: interweave_buildvector_shuffle
tracksRegLiveness: true
body: |
bb.1:
liveins: $q0, $q1
- ; CHECK-LABEL: name: reverse_interweave_buildvector_shuffle
+ ; CHECK-LABEL: name: interweave_buildvector_shuffle
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4, 7, 2, 5, 0)
- ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<8 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(s64) = G_CONSTANT i64 0
%3:_(s64) = G_CONSTANT i64 1
%4:_(s64) = G_CONSTANT i64 2
%5:_(s64) = G_CONSTANT i64 3
- %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
- %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
- %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
- %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
- %15:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
- %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
- %17:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
- %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %15:_, %12:_, %17:_, %14:_, %11:_, %16:_, %13:_
RET_ReallyLR implicit %18
...
---
-name: reverse_interweave_same_size_as_dest_buildvector_shuffle
+name: interweave_same_size_as_dest_buildvector_shuffle
tracksRegLiveness: true
body: |
bb.1:
- liveins: $q0, $q1
- ; CHECK-LABEL: name: reverse_interweave_same_size_as_dest_buildvector_shuffle
- ; CHECK: liveins: $q0, $q1
+ liveins: $q0
+ ; CHECK-LABEL: name: interweave_same_size_as_dest_buildvector_shuffle
+ ; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4)
- ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[COPY]](<4 x s32>)
%0:_(<4 x s32>) = COPY $q0
- %1:_(<4 x s32>) = COPY $q1
- %2:_(s64) = G_CONSTANT i64 0
- %3:_(s64) = G_CONSTANT i64 1
- %4:_(s64) = G_CONSTANT i64 2
- %5:_(s64) = G_CONSTANT i64 3
- %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
- %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %1:_(s64) = G_CONSTANT i64 0
+ %2:_(s64) = G_CONSTANT i64 1
+ %3:_(s64) = G_CONSTANT i64 2
+ %4:_(s64) = G_CONSTANT i64 3
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %1:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
%12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
%14:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_
RET_ReallyLR implicit %14
...
---
-name: reverse_interweave_half_size_as_dest_buildvector_shuffle
+name: half_size_as_dest_buildvector_shuffle
tracksRegLiveness: true
body: |
bb.1:
- liveins: $q0, $q1
- ; CHECK-LABEL: name: reverse_interweave_half_size_as_dest_buildvector_shuffle
- ; CHECK: liveins: $q0, $q1
+ liveins: $q0
+ ; CHECK-LABEL: name: half_size_as_dest_buildvector_shuffle
+ ; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 4)
- ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<2 x s32>)
- %0:_(<4 x s32>) = COPY $q0
- %1:_(<4 x s32>) = COPY $q1
- %2:_(s64) = G_CONSTANT i64 0
- %3:_(s64) = G_CONSTANT i64 3
- %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
- %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_
- RET_ReallyLR implicit %12
-...
----
-name: reverse_concat_single_buildvector_shuffle
-tracksRegLiveness: true
-body: |
- bb.1:
- liveins: $q0, $q1
- ; CHECK-LABEL: name: reverse_concat_single_buildvector_shuffle
- ; CHECK: liveins: $q0, $q1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(3, 1, 0, 2)
- ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(s64) = G_CONSTANT i64 0
%2:_(s64) = G_CONSTANT i64 1
- %3:_(s64) = G_CONSTANT i64 2
- %4:_(s64) = G_CONSTANT i64 3
- %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %1:_(s64)
%11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
- %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %1:_(s64)
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %18:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_
- RET_ReallyLR implicit %18
-...
----
-name: reverse_concat_double_buildvector_shuffle
-tracksRegLiveness: true
-body: |
- bb.1:
- liveins: $q0, $q1
- ; CHECK-LABEL: name: reverse_concat_double_buildvector_shuffle
- ; CHECK: liveins: $q0, $q1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 6, 4, 5, 7, 1, 0, 2, 0, 5, 4, 1, 7)
- ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<16 x s32>)
- %0:_(<4 x s32>) = COPY $q0
- %1:_(<4 x s32>) = COPY $q1
- %2:_(s64) = G_CONSTANT i64 0
- %3:_(s64) = G_CONSTANT i64 1
- %4:_(s64) = G_CONSTANT i64 2
- %5:_(s64) = G_CONSTANT i64 3
- %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
- %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
- %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
- %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
- %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
- %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
- %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
- %18:_(<16 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_, %12:_, %13:_, %11:_, %13:_, %16:_, %15:_, %12:_, %17:_
- RET_ReallyLR implicit %18
+ %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_
+ RET_ReallyLR implicit %12
...
---
-name: reverse_concat_buildvector_shuffle_three_sources
+name: concat_buildvector_shuffle_three_sources
tracksRegLiveness: true
body: |
bb.1:
- liveins: $q0, $q1, $q2
- ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_three_sources
- ; CHECK: liveins: $q0, $q1, $q2
+ liveins: $d0, $d1, $d2, $d3
+ ; CHECK-LABEL: name: concat_buildvector_shuffle_three_sources
+ ; CHECK: liveins: $d0, $d1, $d2, $d3
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
- ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64)
- ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C]](s64)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC1]](s32), [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC1]](s32)
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<2 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY3]](<2 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[DEF]](s32), [[EVEC1]](s32), [[DEF]](s32), [[EVEC2]](s32), [[DEF]](s32), [[EVEC3]](s32), [[DEF]](s32)
; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>)
- %0:_(<4 x s32>) = COPY $q0
- %1:_(<4 x s32>) = COPY $q1
- %2:_(<4 x s32>) = COPY $q2
- %3:_(s64) = G_CONSTANT i64 1
- %4:_(s64) = G_CONSTANT i64 2
- %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
- %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
- %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
- %15:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %4:_(s64)
- %16:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %3:_(s64)
- %18:_(<8 x s32>) = G_BUILD_VECTOR %12:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %12:_
- RET_ReallyLR implicit %18
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<2 x s32>) = COPY $d2
+ %3:_(<2 x s32>) = COPY $d3
+ %4:_(s64) = G_CONSTANT i64 1
+ %5:_(s64) = G_CONSTANT i64 2
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<2 x s32>), %4:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<2 x s32>), %5:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %4:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %5:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<2 x s32>), %4:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<2 x s32>), %5:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %3:_(<2 x s32>), %4:_(s64)
+ %18:_(s32) = G_EXTRACT_VECTOR_ELT %3:_(<2 x s32>), %5:_(s64)
+ %19:_(<8 x s32>) = G_BUILD_VECTOR %11:_(s32), %12:_, %13:_, %14:_, %15:_, %16:_, %17:_, %18:_
+ RET_ReallyLR implicit %19
...
---
-name: reverse_concat_buildvector_shuffle_different_element_size
+name: concat_buildvector_shuffle_different_element_size
tracksRegLiveness: true
body: |
bb.1:
liveins: $q0, $d0
- ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_different_element_size
+ ; CHECK-LABEL: name: concat_buildvector_shuffle_different_element_size
; CHECK: liveins: $q0, $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
- ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
- ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[DEF]](s32), [[DEF]](s32), [[EVEC1]](s32), [[EVEC2]](s32)
- ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>)
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<6 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<2 x s32>) = COPY $d0
%2:_(s64) = G_CONSTANT i64 0
%3:_(s64) = G_CONSTANT i64 1
%4:_(s64) = G_CONSTANT i64 2
%5:_(s64) = G_CONSTANT i64 3
- %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
- %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
- %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
- %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %5:_(s64)
- %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %4:_(s64)
- %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %11:_, %12:_
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %2:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %3:_(s64)
+ %18:_(<6 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_
RET_ReallyLR implicit %18
...
---
-name: reverse_concat_buildvector_shuffle_different_type
+name: concat_buildvector_shuffle_different_type
tracksRegLiveness: true
body: |
bb.1:
liveins: $q0, $q1
- ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_different_type
+ ; CHECK-LABEL: name: concat_buildvector_shuffle_different_type
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
@@ -402,14 +350,14 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
- ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
- ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
- ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C3]](s64)
- ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C2]](s64)
- ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C1]](s64)
- ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C3]](s64)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC6]](s32), [[EVEC7]](s32)
; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>)
%0:_(<4 x s32>) = COPY $q0
@@ -418,24 +366,24 @@ body: |
%3:_(s64) = G_CONSTANT i64 1
%4:_(s64) = G_CONSTANT i64 2
%5:_(s64) = G_CONSTANT i64 3
- %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
- %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
- %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
- %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %5:_(s64)
- %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %4:_(s64)
- %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %3:_(s64)
- %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %2:_(s64)
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %2:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %3:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %4:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %5:_(s64)
%18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
RET_ReallyLR implicit %18
...
---
-name: reverse_concat_buildvector_shuffle_non_constant_id
+name: concat_buildvector_shuffle_non_constant_id
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $q0, $q1
- ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_non_constant_id
+ ; CHECK-LABEL: name: concat_buildvector_shuffle_non_constant_id
; CHECK: liveins: $d0, $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
@@ -454,26 +402,25 @@ body: |
RET_ReallyLR implicit %12
...
---
-name: reverse_concat_buildvector_shuffle_other_sources
+name: concat_buildvector_shuffle_other_sources
tracksRegLiveness: true
body: |
bb.1:
- liveins: $q0, $q1
- ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_other_sources
- ; CHECK: liveins: $q0, $q1
+ liveins: $q0
+ ; CHECK-LABEL: name: concat_buildvector_shuffle_other_sources
+ ; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
- ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[C2]](s32), [[EVEC2]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[C3]](s32), [[EVEC2]](s32)
; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $q0
- %1:_(<4 x s32>) = COPY $q1
%2:_(s64) = G_CONSTANT i64 0
%3:_(s64) = G_CONSTANT i64 1
%4:_(s64) = G_CONSTANT i64 2
@@ -481,7 +428,7 @@ body: |
%10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
%11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
%12:_(s32) = G_CONSTANT i32 42
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
%18:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_
RET_ReallyLR implicit %18
...
@@ -500,14 +447,14 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
- ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
- ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
- ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C3]](s64)
- ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C2]](s64)
- ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64)
- ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64)
+ ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C2]](s64)
+ ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C3]](s64)
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR_TRUNC [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC6]](s32), [[EVEC7]](s32)
; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR_TRUNC]](<8 x s16>)
%0:_(<4 x s32>) = COPY $q0
@@ -516,14 +463,14 @@ body: |
%3:_(s64) = G_CONSTANT i64 1
%4:_(s64) = G_CONSTANT i64 2
%5:_(s64) = G_CONSTANT i64 3
- %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
- %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
- %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
- %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
- %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
- %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
- %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
- %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+ %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+ %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+ %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+ %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+ %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+ %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+ %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
%18:_(<8 x s16>) = G_BUILD_VECTOR_TRUNC %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
RET_ReallyLR implicit %18
...
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index 07cb5379a075c2..98bd794b2d97e3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1351,10 +1351,30 @@ define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
}
define <8 x i8> @getl(<16 x i8> %x) #0 {
-; CHECK-LABEL: getl:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: getl:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: getl:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov v1.b[0], v0.b[0]
+; CHECK-GI-NEXT: mov b3, v0.b[2]
+; CHECK-GI-NEXT: mov v1.b[1], v2.b[0]
+; CHECK-GI-NEXT: mov b2, v0.b[3]
+; CHECK-GI-NEXT: mov v1.b[2], v3.b[0]
+; CHECK-GI-NEXT: mov b3, v0.b[4]
+; CHECK-GI-NEXT: mov v1.b[3], v2.b[0]
+; CHECK-GI-NEXT: mov b2, v0.b[5]
+; CHECK-GI-NEXT: mov v1.b[4], v3.b[0]
+; CHECK-GI-NEXT: mov b3, v0.b[6]
+; CHECK-GI-NEXT: mov b0, v0.b[7]
+; CHECK-GI-NEXT: mov v1.b[5], v2.b[0]
+; CHECK-GI-NEXT: mov v1.b[6], v3.b[0]
+; CHECK-GI-NEXT: mov v1.b[7], v0.b[0]
+; CHECK-GI-NEXT: fmov d0, d1
+; CHECK-GI-NEXT: ret
%vecext = extractelement <16 x i8> %x, i32 0
%vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
%vecext1 = extractelement <16 x i8> %x, i32 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index a728836fb05585..f548a0e01feee6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -397,13 +397,10 @@ define void @test_vrev64(ptr nocapture %source, ptr nocapture %dst) nounwind ssp
;
; CHECK-GI-LABEL: test_vrev64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI27_0
; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI27_0]
-; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-GI-NEXT: mov h1, v0[1]
-; CHECK-GI-NEXT: str h0, [x1]
-; CHECK-GI-NEXT: str h1, [x1, #2]
+; CHECK-GI-NEXT: add x8, x1, #2
+; CHECK-GI-NEXT: st1.h { v0 }[6], [x1]
+; CHECK-GI-NEXT: st1.h { v0 }[5], [x8]
; CHECK-GI-NEXT: ret
entry:
%tmp2 = load <8 x i16>, ptr %source, align 4
More information about the llvm-commits
mailing list