[llvm] [AArch64][GlobalISel] Combine G_EXTRACT_VECTOR_ELT and G_BUILD_VECTOR sequences into G_SHUFFLE_VECTOR (PR #110545)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 30 11:00:46 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-aarch64

Author: Valentijn (ValentijnvdBeek)

<details>
<summary>Changes</summary>

This combine tries to find all the build vectors whose source elements all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors. One example where this may happen is for AI chips where there are a lot of matrix multiplications. Typically there vectors are dissected and then rearranged into the right transformation.

E.g.
```
  %donor1(<2 x s32>) = COPY $d0
  %donor2(<2 x s32>) = COPY $d1
  %ext1 = G_EXTRACT_VECTOR_ELT %donor1, 0
  %ext2 = G_EXTRACT_VECTOR_ELT %donor1, 1
  %ext3 = G_EXTRACT_VECTOR_ELT %donor2, 0
  %ext4 = G_EXTRACT_VECTOR_ELT %donor2, 1
  %vector = G_BUILD_VECTOR %ext1, %ext2, %ext3, %ext4
  ==>
  replace with:  %vector = G_SHUFFLE_VECTOR %donor1, %donor2, shufflemask(0, 1, 2, 3)
```

It has some speed ups in the AArch64 architecture. This pattern can also occur in other architectures were a lot of vector operations may happen. It can be tempting to express matrix operations in a similar fashion rather than as a one cohesive shufflevector directly.  

---

Patch is 29.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110545.diff


7 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+9) 
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+12-1) 
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+93) 
- (modified) llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp (-2) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir (+302) 
- (modified) llvm/test/CodeGen/AArch64/arm64-neon-copy.ll (+16-88) 
- (modified) llvm/test/CodeGen/AArch64/arm64-rev.ll (+6-3) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 76d51ab819f441..15b0aaa5cf9d03 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -580,6 +580,15 @@ class CombinerHelper {
   bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
   void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg);
 
+  /// Combine extracts of two different arrays into one build vector into a
+  /// shuffle vector.
+  bool matchCombineExtractToShuffle(
+      MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &MatchInfo,
+      std::pair<Register, Register> &VectorRegisters);
+  void applyCombineExtractToShuffle(
+      MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &MatchInfo,
+      std::pair<Register, Register> &VectorRegisters);
+
   bool matchExtractAllEltsFromBuildVector(
       MachineInstr &MI,
       SmallVectorImpl<std::pair<Register, MachineInstr *>> &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f838c6e62a2ce3..0525bfe1b0ddb2 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -905,6 +905,16 @@ def extract_vec_elt_build_vec : GICombineRule<
     [{ return Helper.matchExtractVecEltBuildVec(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyExtractVecEltBuildVec(*${root}, ${matchinfo}); }])>;
 
+def extract_vector_register_to_id_mapping_matchinfo :
+  GIDefMatchData<"SmallVector<std::pair<Register, int>>">;
+def vector_reg_pair_matchinfo :
+  GIDefMatchData<"std::pair<Register, Register>">;
+def extract_vector_element_build_vector_to_shuffle_vector : GICombineRule<
+  (defs root:$root, extract_vector_register_to_id_mapping_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair),
+  (match (wip_match_opcode G_BUILD_VECTOR):$root,
+    [{ return Helper.matchCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }]),
+  (apply [{ Helper.applyCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }])>;
+
 // Fold away full elt extracts from a build_vector.
 def extract_all_elts_from_build_vector_matchinfo :
   GIDefMatchData<"SmallVector<std::pair<Register, MachineInstr*>>">;
@@ -916,7 +926,8 @@ def extract_all_elts_from_build_vector : GICombineRule<
 
 def extract_vec_elt_combines : GICombineGroup<[
   extract_vec_elt_build_vec,
-  extract_all_elts_from_build_vector]>;
+  extract_all_elts_from_build_vector,
+  extract_vector_element_build_vector_to_shuffle_vector]>;
 
 def funnel_shift_from_or_shift : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c279289f9161bf..1499caeb37d134 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -38,8 +38,10 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cmath>
+#include <llvm/ADT/SmallVector.h>
 #include <optional>
 #include <tuple>
+#include <utility>
 
 #define DEBUG_TYPE "gi-combiner"
 
@@ -4205,6 +4207,97 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
   replaceSingleDefInstWithReg(MI, Reg);
 }
 
+bool CombinerHelper::matchCombineExtractToShuffle(
+    MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &VecIndexPair,
+    std::pair<Register, Register> &VectorRegisters) {
+  assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+  const GBuildVector *Build = cast<GBuildVector>(&MI);
+  // This combine tries to find all the build vectors whose source elements
+  // all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors.
+  // One example where this may happen is for AI chips where there are a lot
+  // of matrix multiplications. Typically there vectors are disected and then
+  // rearranged into the right transformation.
+  // E.g.
+  //  %donor1(<2 x s32>) = COPY $d0
+  //  %donor2(<2 x s32>) = COPY $d1
+  //  %ext1 = G_EXTRACT_VECTOR_ELT %donor1, 0
+  //  %ext2 = G_EXTRACT_VECTOR_ELT %donor1, 1
+  //  %ext3 = G_EXTRACT_VECTOR_ELT %donor2, 0
+  //  %ext4 = G_EXTRACT_VECTOR_ELT %donor2, 1
+  /// %vector = G_BUILD_VECTOR %ext1, %ext2, %ext3, %ext4
+  // ==>
+  // replace with:
+  //   %vector = G_SHUFFLE_VECTOR %donor1, %donor2, shufflemask(0, 1, 2, 3)
+  SmallSetVector<Register, 2> RegisterVector;
+  const unsigned NumElements = Build->getNumSources();
+  for (unsigned Index = 0; Index < NumElements; Index++) {
+    Register SrcReg = peekThroughBitcast(Build->getSourceReg(Index), MRI);
+    auto *ExtractInstr = getOpcodeDef<GExtractVectorElement>(SrcReg, MRI);
+    if (!ExtractInstr)
+      return false;
+
+    // For shufflemasks we need to know exactly what index to place each element
+    // so if it this build vector doesn't use exclusively constants than we
+    // can't replace with a shufflevector
+    auto Cst = getIConstantVRegVal(ExtractInstr->getIndexReg(), MRI);
+    if (!Cst)
+      return false;
+    unsigned Idx = Cst->getZExtValue();
+
+    Register VectorReg = ExtractInstr->getVectorReg();
+    RegisterVector.insert(VectorReg);
+    VecIndexPair.emplace_back(std::make_pair(VectorReg, Idx));
+  }
+
+  // Create a pair so that we don't need to look for them later. This code is
+  // incorrect if we have more than two vectors in the set. Since we can only
+  // put two vectors in a shuffle, we reject any solution with more than two
+  // anyways.
+  VectorRegisters =
+      std::make_pair(RegisterVector.front(), RegisterVector.back());
+
+  // We check that they're the same type before running. We can also grow the
+  // smaller one to the target size, but there isn't an elegant way to do that
+  // until we have a good lowering for G_EXTRACT_SUBVECTOR.
+  if (MRI.getType(VectorRegisters.first) != MRI.getType(VectorRegisters.second))
+    return false;
+
+  return RegisterVector.size() <= 2;
+}
+
+void CombinerHelper::applyCombineExtractToShuffle(
+    MachineInstr &MI, SmallVectorImpl<std::pair<Register, int>> &MatchInfo,
+    std::pair<Register, Register> &VectorRegisters) {
+  assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+
+  const Register FirstRegister = VectorRegisters.first;
+  const LLT FirstRegisterType = MRI.getType(FirstRegister);
+  const unsigned VectorSize = FirstRegisterType.getNumElements();
+  SmallVector<int, 32> ShuffleMask;
+  for (auto &Pair : MatchInfo) {
+    const Register VectorReg = Pair.first;
+    int Idx = Pair.second;
+
+    if (VectorReg != VectorRegisters.first) {
+      Idx += VectorSize;
+    }
+    ShuffleMask.emplace_back(Idx);
+  }
+
+  // We could reuse the same vector register and shuffle them both together
+  // but it is nicer for later optimizations to explicitely make it undef.
+  const GBuildVector *BuildVector = cast<GBuildVector>(&MI);
+  Register SecondRegister = VectorRegisters.second;
+  if (FirstRegister == SecondRegister) {
+    SecondRegister = MRI.createGenericVirtualRegister(FirstRegisterType);
+    Builder.buildUndef(SecondRegister);
+  }
+
+  Builder.buildShuffleVector(BuildVector->getOperand(0), FirstRegister,
+                             SecondRegister, ShuffleMask);
+  MI.eraseFromParent();
+}
+
 bool CombinerHelper::matchExtractAllEltsFromBuildVector(
     MachineInstr &MI,
     SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 59f2fc633f5de7..1ddecefa173838 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -772,8 +772,6 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
   LLT DstTy = Res.getLLTTy(*getMRI());
   LLT Src1Ty = Src1.getLLTTy(*getMRI());
   LLT Src2Ty = Src2.getLLTTy(*getMRI());
-  assert((size_t)(Src1Ty.getNumElements() + Src2Ty.getNumElements()) >=
-         Mask.size());
   assert(DstTy.getElementType() == Src1Ty.getElementType() &&
          DstTy.getElementType() == Src2Ty.getElementType());
   (void)DstTy;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
index 93f6051c3bd3b7..3cc836b9718297 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir
@@ -151,3 +151,305 @@ body:             |
     RET_ReallyLR implicit $x0
 
 ...
+---
+name: reverse_concat_buildvector_shuffle
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 7, 6, 5, 4)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s64) = G_CONSTANT i64 1
+    %4:_(s64) = G_CONSTANT i64 2
+    %5:_(s64) = G_CONSTANT i64 3
+    %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+    %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+    %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+    %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+    %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
+    %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+    %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+    %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+    %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
+    RET_ReallyLR implicit %18
+...
+---
+name: reverse_interweave_buildvector_shuffle
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: reverse_interweave_buildvector_shuffle
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4, 7, 2, 5, 0)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s64) = G_CONSTANT i64 1
+    %4:_(s64) = G_CONSTANT i64 2
+    %5:_(s64) = G_CONSTANT i64 3
+    %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+    %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+    %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+    %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+    %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
+    %15:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+    %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+    %17:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+    %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_
+    RET_ReallyLR implicit %18
+...
+
+---
+name: reverse_interweave_same_size_as_dest_buildvector_shuffle
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: reverse_interweave_same_size_as_dest_buildvector_shuffle
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s64) = G_CONSTANT i64 1
+    %4:_(s64) = G_CONSTANT i64 2
+    %5:_(s64) = G_CONSTANT i64 3
+    %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+    %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+    %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+    %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+    %14:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_
+    RET_ReallyLR implicit %14
+...
+---
+name: reverse_interweave_half_size_as_dest_buildvector_shuffle
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: reverse_interweave_half_size_as_dest_buildvector_shuffle
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 4)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<2 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s64) = G_CONSTANT i64 3
+    %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+    %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+    %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_
+    RET_ReallyLR implicit %12
+...
+---
+name: reverse_concat_single_buildvector_shuffle
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: reverse_concat_single_buildvector_shuffle
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(3, 1, 0, 2)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(s64) = G_CONSTANT i64 0
+    %2:_(s64) = G_CONSTANT i64 1
+    %3:_(s64) = G_CONSTANT i64 2
+    %4:_(s64) = G_CONSTANT i64 3
+    %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+    %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+    %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %1:_(s64)
+    %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+    %18:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_
+    RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_double_buildvector_shuffle
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: reverse_concat_double_buildvector_shuffle
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 6, 4, 5, 7, 1, 0, 2, 0, 5, 4, 1, 7)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<16 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s64) = G_CONSTANT i64 1
+    %4:_(s64) = G_CONSTANT i64 2
+    %5:_(s64) = G_CONSTANT i64 3
+    %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+    %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+    %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+    %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+    %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+    %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64)
+    %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+    %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64)
+    %18:_(<16 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_, %12:_, %13:_, %11:_, %13:_, %16:_, %15:_, %12:_, %17:_
+    RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_buildvector_shuffle_three_sources
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $q0, $q1, $q2
+    ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_three_sources
+    ; CHECK: liveins: $q0, $q1, $q2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+    ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+    ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64)
+    ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64)
+    ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C1]](s64)
+    ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C]](s64)
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC1]](s32), [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC1]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(<4 x s32>) = COPY $q2
+    %3:_(s64) = G_CONSTANT i64 1
+    %4:_(s64) = G_CONSTANT i64 2
+    %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+    %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+    %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64)
+    %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64)
+    %15:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %4:_(s64)
+    %16:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %3:_(s64)
+    %18:_(<8 x s32>) = G_BUILD_VECTOR %12:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %12:_
+    RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_buildvector_shuffle_different_element_size
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $q0, $d0
+    ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_different_element_size
+    ; CHECK: liveins: $q0, $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
+    ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64)
+    ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+    ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[DEF]](s32), [[DEF]](s32), [[EVEC1]](s32), [[EVEC2]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<2 x s32>) = COPY $d0
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s64) = G_CONSTANT i64 1
+    %4:_(s64) = G_CONSTANT i64 2
+    %5:_(s64) = G_CONSTANT i64 3
+    %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64)
+    %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64)
+    %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64)
+    %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64)
+    %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %5:_(s64)
+    %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %4:_(s64)
+    %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %11:_, %12:_
+    RET_ReallyLR implicit %18
+...
+---
+name: reverse_concat_buil...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/110545


More information about the llvm-commits mailing list