[llvm] [GlobalIsel] Combine G_EXTRACT_VECTOR_ELT (PR #85321)

Thorsten Schütt via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 31 11:25:38 PDT 2024


https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/85321

>From 260ce2c595dc9352a1df77c659ce262ba2e8ccc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Wed, 13 Mar 2024 10:35:35 +0100
Subject: [PATCH 1/7] [GlobalIsel] Combine G_EXTRACT_VECTOR_ELT

preliminary steps
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |   3 +
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |  33 +++
 .../include/llvm/Target/GlobalISel/Combine.td |   9 +-
 llvm/lib/CodeGen/GlobalISel/CMakeLists.txt    |   1 +
 .../GlobalISel/CombinerHelperVectorOps.cpp    | 174 ++++++++++++++
 .../GlobalISel/combine-extract-vec-elt.mir    | 215 +++++++++++++++++-
 .../CodeGen/AArch64/extract-vector-elt.ll     |  18 +-
 7 files changed, 434 insertions(+), 19 deletions(-)
 create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9e8fc5d635c50a..d2f9d74bf7d61a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -815,6 +815,9 @@ class CombinerHelper {
   /// Combine addos.
   bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  /// Combine extract vector element.
+  bool matchExtractVectorElement(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 261cfcf504d5fe..6727db6988a360 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -739,6 +739,39 @@ class GOr : public GLogicalBinOp {
   };
 };
 
+/// Represents an extract vector element.
+class GExtractVectorElement : public GenericMachineInstr {
+public:
+  Register getVectorReg() const { return getOperand(1).getReg(); }
+  Register getIndexReg() const { return getOperand(2).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT;
+  }
+};
+
+/// Represents an insert vector element.
+class GInsertVectorElement : public GenericMachineInstr {
+public:
+  Register getVectorReg() const { return getOperand(1).getReg(); }
+  Register getElementReg() const { return getOperand(2).getReg(); }
+  Register getIndexReg() const { return getOperand(3).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
+  }
+};
+
+/// Represents a freeze.
+class GFreeze : public GenericMachineInstr {
+public:
+  Register getSourceReg() const { return getOperand(1).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_FREEZE;
+  }
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 6980cbd04aeb1c..1c71e6b80db051 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1291,6 +1291,12 @@ def match_addos : GICombineRule<
         [{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
 
+def match_extract_of_element : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
+        [{ return Helper.matchExtractVectorElement(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
 // Combines concat operations
 def concat_matchinfo : GIDefMatchData<"SmallVector<Register>">;
 def combine_concat_vector : GICombineRule<
@@ -1374,7 +1380,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos]>;
+    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+    match_extract_of_element]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index 46e6c6df5998e5..54ac7f72011a6e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMGlobalISel
   GlobalISel.cpp
   Combiner.cpp
   CombinerHelper.cpp
+  CombinerHelperVectorOps.cpp
   GIMatchTableExecutor.cpp
   GISelChangeObserver.cpp
   IRTranslator.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
new file mode 100644
index 00000000000000..f1b42ed549636a
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -0,0 +1,174 @@
+//===- CombinerHelperVectorOps.cpp-----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+#include <optional>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
+                                               BuildFnTy &MatchInfo) {
+
+  GExtractVectorElement *Extract = cast<GExtractVectorElement>(&MI);
+
+  Register Dst = Extract->getReg(0);
+  Register Vector = Extract->getVectorReg();
+  Register Index = Extract->getIndexReg();
+  LLT DstTy = MRI.getType(Dst);
+  LLT VectorTy = MRI.getType(Vector);
+
+  // The vector register can be def'd by various ops that
+  // have vector as its type. They can all be used for
+  // constant folding, scalarizing, canonicalization, or
+  // combining based on symmetry.
+  //
+  // vector like ops
+  // * build vector
+  // * build vector trunc
+  // * shuffle vector
+  // * splat vector
+  // * concat vectors
+  // * insert/extract vector element
+  // * insert/extract subvector
+  // * vector loads
+  // * scalable vector loads
+  //
+  // compute like ops
+  // * binary ops
+  // * unary ops
+  //  * exts and truncs
+  //  * casts
+  //  * fneg
+  // * select
+  // * phis
+  // * cmps
+  // * freeze
+  // * bitcast
+  // * undef
+
+  // Fold extractVectorElement(undef, undef) -> undef
+  if ((getOpcodeDef<GImplicitDef>(Vector, MRI) ||
+       getOpcodeDef<GImplicitDef>(Index, MRI)) &&
+      isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+    // If the Vector register is undef, then we cannot extract an element from
+    // it. An undef extract Index can be arbitrarily chosen to be an
+    // out-of-range index value, which would result in the instruction being
+    // poison.
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+    return true;
+  }
+
+  // We try to get the value of the Index register.
+  std::optional<ValueAndVReg> MaybeIndex =
+      getIConstantVRegValWithLookThrough(Index, MRI);
+  std::optional<APInt> IndexC = std::nullopt;
+
+  if (MaybeIndex)
+    IndexC = MaybeIndex->Value;
+
+  // Fold extractVectorElement(Vector, TOOLARGE) -> undef
+  if (IndexC && VectorTy.isFixedVector() &&
+      IndexC->uge(VectorTy.getNumElements()) &&
+      isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+    // For fixed-length vectors, it's invalid to extract out-of-range elements.
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+    return true;
+  }
+
+  // Fold extractVectorElement(freeze(FV), Index) ->
+  //     freeze(extractVectorElement(FV, Index))
+  if (auto *Freeze = getOpcodeDef<GFreeze>(Vector, MRI)) {
+    if (MRI.hasOneNonDBGUse(Freeze->getReg(0)) &&
+        isLegalOrBeforeLegalizer({TargetOpcode::G_FREEZE, {DstTy}})) {
+      // For G_FREEZE, the input and the output types are identical.
+      // Moving the freeze from the Vector into the front of the extract
+      // preserves the freeze semantics. We check above that
+      // the Index register is not undef.
+      // Furthermore, the Vector register
+      // becomes easier to analyze. A build vector
+      // could have been hidden behind the freeze.
+      MatchInfo = [=](MachineIRBuilder &B) {
+        auto Extract =
+            B.buildExtractVectorElement(DstTy, Freeze->getSourceReg(), Index);
+        B.buildFreeze(Dst, Extract);
+      };
+      return true;
+    }
+  }
+
+  // Fold extractVectorElement(insertVectorElement(_, Value, Index), Index) ->
+  // Value
+  if (auto *Insert = getOpcodeDef<GInsertVectorElement>(Vector, MRI)) {
+    if (Insert->getIndexReg() == Index) {
+      // There is no one-use check. We have to keep the insert.
+      // We only check for equality of the Index registers.
+      // The combine is independent of their constness.
+      // We try to insert Value and then immediately extract
+      // it from the same Index.
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildCopy(Dst, Insert->getElementReg());
+      };
+      return true;
+    }
+  }
+
+  // Fold extractVectorElement(insertVectorElement(Vector, _, C1), C2),
+  // where C1 != C2
+  // -> extractVectorElement(Vector, C2)
+  if (IndexC) {
+    if (auto *Insert = getOpcodeDef<GInsertVectorElement>(Vector, MRI)) {
+      std::optional<ValueAndVReg> MaybeIndex =
+          getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI);
+      if (MaybeIndex && MaybeIndex->Value != *IndexC) {
+        // There is no one-use check. We have to keep the insert.
+        // When both Index registers are constants and not equal,
+        // we can look into the Vector register of the insert.
+        MatchInfo = [=](MachineIRBuilder &B) {
+          B.buildExtractVectorElement(Dst, Insert->getVectorReg(), Index);
+        };
+        return true;
+      }
+    }
+  }
+
+  // Fold extractVectorElement(BuildVector(.., V, ...), IndexOfV) -> V
+  if (IndexC) {
+    if (auto *Build = getOpcodeDef<GBuildVector>(Vector, MRI)) {
+      EVT Ty(getMVTForLLT(VectorTy));
+      if (MRI.hasOneNonDBGUse(Build->getReg(0)) ||
+          getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) {
+        // There is a one-use check. There are more combines on build vectors.
+        // If the Index is constant, then we can extract the element from the
+        // given offset.
+        MatchInfo = [=](MachineIRBuilder &B) {
+          B.buildCopy(Dst, Build->getSourceReg(IndexC->getLimitedValue()));
+        };
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index a2116ccc767112..37dc33330196a8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -192,8 +192,8 @@ body:             |
 
 ...
 ---
+# This test checks that this combine runs after the insertvec->build_vector
 name:            extract_from_insert
-alignment:       4
 tracksRegLiveness: true
 liveins:
   - { reg: '$x0' }
@@ -203,8 +203,6 @@ frameInfo:
 body:             |
   bb.1:
     liveins: $x0, $x1
-    ; This test checks that this combine runs after the insertvec->build_vector
-    ; combine.
     ; CHECK-LABEL: name: extract_from_insert
     ; CHECK: liveins: $x0, $x1
     ; CHECK-NEXT: {{  $}}
@@ -247,3 +245,214 @@ body:             |
     RET_ReallyLR implicit $x0
 
 ...
+---
+name:            extract_from_vector_undef
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_vector_undef
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %extract:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = G_IMPLICIT_DEF
+    %idx:_(s32) = G_CONSTANT i32 -2
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_index_undef
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: extract_from_index_undef
+    ; CHECK: %extract:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = G_IMPLICIT_DEF
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_index_too_large
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_index_too_large
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %extract:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = G_CONSTANT i32 3000
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_with_freeze
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_with_freeze
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: %idx:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32)
+    ; CHECK-NEXT: %extract:_(s64) = G_FREEZE [[EVEC]]
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = COPY $w1
+    %fvec:_(<2 x s64>) = G_FREEZE %vec
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %fvec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_insert_symmetry
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_insert_symmetry
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %element:_(s64) = COPY $x1
+    ; CHECK-NEXT: $x0 = COPY %element(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = COPY $w1
+    %element:_(s64) = COPY $x1
+    %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32)
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_insert_with_different_consts
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_insert_with_different_consts
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: %idx2:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx2(s32)
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = G_CONSTANT i32 0
+    %idx2:_(s32) = G_CONSTANT i32 1
+    %element:_(s64) = COPY $x1
+    %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32)
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx2(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_build_vector_non_const
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_build_vector_non_const
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %idx:_(s32) = COPY $w0
+    ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
+    ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = COPY $w0
+    %arg1:_(s64) = COPY $x0
+    %arg2:_(s64) = COPY $x1
+    %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            extract_from_build_vector_const
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_build_vector_const
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+    ; CHECK-NEXT: $x0 = COPY %arg1(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = G_CONSTANT i32 0
+    %arg1:_(s64) = COPY $x0
+    %arg2:_(s64) = COPY $x1
+    %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index c5c525a15ad9be..504222e0036e22 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -25,20 +25,9 @@ entry:
 }
 
 define i64 @extract_v2i64_undef_vector(<2 x i64> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v2i64_undef_vector:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: extract_v2i64_undef_vector:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    sub sp, sp, #16
-; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT:    mov w9, w0
-; CHECK-GI-NEXT:    mov x8, sp
-; CHECK-GI-NEXT:    and x9, x9, #0x1
-; CHECK-GI-NEXT:    ldr x0, [x8, x9, lsl #3]
-; CHECK-GI-NEXT:    add sp, sp, #16
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: extract_v2i64_undef_vector:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ret
 entry:
   %d = extractelement <2 x i64> undef, i32 %c
   ret i64 %d
@@ -130,7 +119,6 @@ define i64 @extract_v2i64_extract_of_insert_different_const(<2 x i64> %a, i64 %e
 ;
 ; CHECK-GI-LABEL: extract_v2i64_extract_of_insert_different_const:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[0], x0
 ; CHECK-GI-NEXT:    mov d0, v0.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    ret

>From d2cca65cdc146cef68c898381b5d0aafbee77895 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 15 Mar 2024 07:02:09 +0100
Subject: [PATCH 2/7] address review comments

---
 .../GlobalISel/CombinerHelperVectorOps.cpp    | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index f1b42ed549636a..88d367dfbbdea5 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -30,7 +30,6 @@ using namespace MIPatternMatch;
 
 bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
                                                BuildFnTy &MatchInfo) {
-
   GExtractVectorElement *Extract = cast<GExtractVectorElement>(&MI);
 
   Register Dst = Extract->getReg(0);
@@ -39,10 +38,9 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
   LLT DstTy = MRI.getType(Dst);
   LLT VectorTy = MRI.getType(Vector);
 
-  // The vector register can be def'd by various ops that
-  // have vector as its type. They can all be used for
-  // constant folding, scalarizing, canonicalization, or
-  // combining based on symmetry.
+  // The vector register can be def'd by various ops that have vector as its
+  // type. They can all be used for constant folding, scalarizing,
+  // canonicalization, or combining based on symmetry.
   //
   // vector like ops
   // * build vector
@@ -68,9 +66,12 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
   // * bitcast
   // * undef
 
+  // The MIs def'd on the Index and Vector register;
+  MachineInstr *IndexMI = getDefIgnoringCopies(Index, MRI);
+  MachineInstr *VectorMI = getDefIgnoringCopies(Vector, MRI);
+
   // Fold extractVectorElement(undef, undef) -> undef
-  if ((getOpcodeDef<GImplicitDef>(Vector, MRI) ||
-       getOpcodeDef<GImplicitDef>(Index, MRI)) &&
+  if ((isa<GImplicitDef>(VectorMI) || isa<GImplicitDef>(IndexMI)) &&
       isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
     // If the Vector register is undef, then we cannot extract an element from
     // it. An undef extract Index can be arbitrarily chosen to be an
@@ -99,7 +100,7 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
 
   // Fold extractVectorElement(freeze(FV), Index) ->
   //     freeze(extractVectorElement(FV, Index))
-  if (auto *Freeze = getOpcodeDef<GFreeze>(Vector, MRI)) {
+  if (auto *Freeze = dyn_cast<GFreeze>(VectorMI)) {
     if (MRI.hasOneNonDBGUse(Freeze->getReg(0)) &&
         isLegalOrBeforeLegalizer({TargetOpcode::G_FREEZE, {DstTy}})) {
       // For G_FREEZE, the input and the output types are identical.
@@ -120,7 +121,7 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
 
   // Fold extractVectorElement(insertVectorElement(_, Value, Index), Index) ->
   // Value
-  if (auto *Insert = getOpcodeDef<GInsertVectorElement>(Vector, MRI)) {
+  if (auto *Insert = dyn_cast<GInsertVectorElement>(VectorMI)) {
     if (Insert->getIndexReg() == Index) {
       // There is no one-use check. We have to keep the insert.
       // We only check for equality of the Index registers.
@@ -138,7 +139,7 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
   // where C1 != C2
   // -> extractVectorElement(Vector, C2)
   if (IndexC) {
-    if (auto *Insert = getOpcodeDef<GInsertVectorElement>(Vector, MRI)) {
+    if (auto *Insert = dyn_cast<GInsertVectorElement>(VectorMI)) {
       std::optional<ValueAndVReg> MaybeIndex =
           getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI);
       if (MaybeIndex && MaybeIndex->Value != *IndexC) {
@@ -155,7 +156,7 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
 
   // Fold extractVectorElement(BuildVector(.., V, ...), IndexOfV) -> V
   if (IndexC) {
-    if (auto *Build = getOpcodeDef<GBuildVector>(Vector, MRI)) {
+    if (auto *Build = dyn_cast<GBuildVector>(VectorMI)) {
       EVT Ty(getMVTForLLT(VectorTy));
       if (MRI.hasOneNonDBGUse(Build->getReg(0)) ||
           getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) {

>From e4ae6ebf42d54df5da0e8e7ef490b4eb7721c8b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Wed, 20 Mar 2024 12:26:42 +0100
Subject: [PATCH 3/7] fix typo

---
 llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index 88d367dfbbdea5..3dd32b59c19c7c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -66,7 +66,7 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
   // * bitcast
   // * undef
 
-  // The MIs def'd on the Index and Vector register;
+  // The MIs def'd on the Index and Vector registers;
   MachineInstr *IndexMI = getDefIgnoringCopies(Index, MRI);
   MachineInstr *VectorMI = getDefIgnoringCopies(Vector, MRI);
 

>From e048acb8be842813983e7f02ed6145fd496c1fa2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Wed, 27 Mar 2024 09:13:06 +0100
Subject: [PATCH 4/7] rework

---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  17 ++
 .../include/llvm/Target/GlobalISel/Combine.td |  41 ++-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |   8 +
 .../GlobalISel/CombinerHelperVectorOps.cpp    | 235 +++++++++++++-----
 4 files changed, 230 insertions(+), 71 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index d2f9d74bf7d61a..b597c6229b57a4 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -594,6 +594,10 @@ class CombinerHelper {
   /// This variant does not erase \p MI after calling the build function.
   void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  /// Use a function which takes in a MachineIRBuilder to perform a combine.
+  /// By default, it erases the instruction \p MI from the function.
+  void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);
+
   bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo);
   bool matchFunnelShiftToRotate(MachineInstr &MI);
   void applyFunnelShiftToRotate(MachineInstr &MI);
@@ -818,6 +822,19 @@ class CombinerHelper {
   /// Combine extract vector element.
   bool matchExtractVectorElement(MachineInstr &MI, BuildFnTy &MatchInfo);
 
+  /// Combine extract vector element with freeze on the vector register.
+  bool matchExtractVectorElementWithFreeze(const MachineOperand &MO,
+                                           BuildFnTy &MatchInfo);
+
+  /// Combine extract vector element with a build vector on the vector register.
+  bool matchExtractVectorElementWithBuildVector(const MachineOperand &MO,
+                                                BuildFnTy &MatchInfo);
+
+  /// Combine extract vector element with a insert vector element on the vector
+  /// register and different indices.
+  bool matchExtractVectorElementWithDifferentIndices(const MachineOperand &MO,
+                                                     BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 1c71e6b80db051..976e7140c843db 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1297,6 +1297,33 @@ def match_extract_of_element : GICombineRule<
         [{ return Helper.matchExtractVectorElement(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
 
+def extract_vector_element_not_const : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_INSERT_VECTOR_ELT $src, $x, $value, $idx),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx)),
+   (apply (GIReplaceReg $root, $value))>;
+
+def extract_vector_element_different_indices : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_INSERT_VECTOR_ELT $src, $x, $value, $idx2),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx1),
+   [{ return Helper.matchExtractVectorElementWithDifferentIndices(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_freeze : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_FREEZE $src, $input),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithFreeze(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
 // Combines concat operations
 def concat_matchinfo : GIDefMatchData<"SmallVector<Register>">;
 def combine_concat_vector : GICombineRule<
@@ -1305,6 +1332,15 @@ def combine_concat_vector : GICombineRule<
         [{ return Helper.matchCombineConcatVectors(*${root}, ${matchinfo}); }]),
   (apply [{ Helper.applyCombineConcatVectors(*${root}, ${matchinfo}); }])>;
 
+// match_extract_of_element must be the first!
+def vector_ops_combines: GICombineGroup<[
+match_extract_of_element,
+extract_vector_element_not_const,
+extract_vector_element_different_indices,
+extract_vector_element_build_vector,
+extract_vector_element_freeze
+]>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -1362,7 +1398,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
 
 def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     extract_vec_elt_combines, combines_for_extload, combine_extracted_vector_load,
-    undef_combines, identity_combines, phi_combines,
+    undef_combines, identity_combines, phi_combines, vector_ops_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
     reassocs, ptr_add_immed_chain,
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
@@ -1380,8 +1416,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
-    match_extract_of_element]>;
+    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 2a521b6b068af7..54a3efa33a2370 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4100,6 +4100,14 @@ void CombinerHelper::applyBuildFn(
   MI.eraseFromParent();
 }
 
+void CombinerHelper::applyBuildFnMO(const MachineOperand &MO,
+                                    BuildFnTy &MatchInfo) {
+  MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+  Builder.setInstrAndDebugLoc(*Root);
+  MatchInfo(Builder);
+  Root->eraseFromParent();
+}
+
 void CombinerHelper::applyBuildFnNoErase(
     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
   Builder.setInstrAndDebugLoc(MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index 3dd32b59c19c7c..d97b2df7b5d2c8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
@@ -98,78 +99,176 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
     return true;
   }
 
-  // Fold extractVectorElement(freeze(FV), Index) ->
-  //     freeze(extractVectorElement(FV, Index))
-  if (auto *Freeze = dyn_cast<GFreeze>(VectorMI)) {
-    if (MRI.hasOneNonDBGUse(Freeze->getReg(0)) &&
-        isLegalOrBeforeLegalizer({TargetOpcode::G_FREEZE, {DstTy}})) {
-      // For G_FREEZE, the input and the output types are identical.
-      // Moving the freeze from the Vector into the front of the extract
-      // preserves the freeze semantics. We check above that
-      // the Index register is not undef.
-      // Furthermore, the Vector register
-      // becomes easier to analyze. A build vector
-      // could have been hidden behind the freeze.
-      MatchInfo = [=](MachineIRBuilder &B) {
-        auto Extract =
-            B.buildExtractVectorElement(DstTy, Freeze->getSourceReg(), Index);
-        B.buildFreeze(Dst, Extract);
-      };
-      return true;
-    }
-  }
+  return false;
+}
 
-  // Fold extractVectorElement(insertVectorElement(_, Value, Index), Index) ->
-  // Value
-  if (auto *Insert = dyn_cast<GInsertVectorElement>(VectorMI)) {
-    if (Insert->getIndexReg() == Index) {
-      // There is no one-use check. We have to keep the insert.
-      // We only check for equality of the Index registers.
-      // The combine is independent of their constness.
-      // We try to insert Value and then immediately extract
-      // it from the same Index.
-      MatchInfo = [=](MachineIRBuilder &B) {
-        B.buildCopy(Dst, Insert->getElementReg());
-      };
-      return true;
-    }
-  }
+bool CombinerHelper::matchExtractVectorElementWithDifferentIndices(
+    const MachineOperand &MO, BuildFnTy &MatchInfo) {
+  MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+  GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root);
 
-  // Fold extractVectorElement(insertVectorElement(Vector, _, C1), C2),
-  // where C1 != C2
-  // -> extractVectorElement(Vector, C2)
-  if (IndexC) {
-    if (auto *Insert = dyn_cast<GInsertVectorElement>(VectorMI)) {
-      std::optional<ValueAndVReg> MaybeIndex =
-          getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI);
-      if (MaybeIndex && MaybeIndex->Value != *IndexC) {
-        // There is no one-use check. We have to keep the insert.
-        // When both Index registers are constants and not equal,
-        // we can look into the Vector register of the insert.
-        MatchInfo = [=](MachineIRBuilder &B) {
-          B.buildExtractVectorElement(Dst, Insert->getVectorReg(), Index);
-        };
-        return true;
-      }
-    }
-  }
+  //
+  //  %idx1:_(s64) = G_CONSTANT i64 1
+  //  %idx2:_(s64) = G_CONSTANT i64 2
+  //  %insert:_(<2 x s32>) = G_INSERT_VECTOR_ELT_ELT %bv(<2 x s32>),
+  //  %value(s32), %idx2(s64) %extract:_(s32) = G_EXTRACT_VECTOR_ELT %insert(<2
+  //  x s32>), %idx1(s64)
+  //
+  //  -->
+  //
+  //  %insert:_(<2 x s32>) = G_INSERT_VECTOR_ELT_ELT %bv(<2 x s32>),
+  //  %value(s32), %idx2(s64) %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x
+  //  s32>), %idx1(s64)
+  //
+  //
+
+  Register Index = Extract->getIndexReg();
+
+  // We try to get the value of the Index register.
+  std::optional<ValueAndVReg> MaybeIndex =
+      getIConstantVRegValWithLookThrough(Index, MRI);
+  std::optional<APInt> IndexC = std::nullopt;
+
+  if (!MaybeIndex)
+    return false;
+  else
+    IndexC = MaybeIndex->Value;
 
-  // Fold extractVectorElement(BuildVector(.., V, ...), IndexOfV) -> V
-  if (IndexC) {
-    if (auto *Build = dyn_cast<GBuildVector>(VectorMI)) {
-      EVT Ty(getMVTForLLT(VectorTy));
-      if (MRI.hasOneNonDBGUse(Build->getReg(0)) ||
-          getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) {
-        // There is a one-use check. There are more combines on build vectors.
-        // If the Index is constant, then we can extract the element from the
-        // given offset.
-        MatchInfo = [=](MachineIRBuilder &B) {
-          B.buildCopy(Dst, Build->getSourceReg(IndexC->getLimitedValue()));
-        };
-        return true;
-      }
-    }
+  Register Vector = Extract->getVectorReg();
+
+  GInsertVectorElement *Insert =
+      getOpcodeDef<GInsertVectorElement>(Vector, MRI);
+  if (!Insert)
+    return false;
+
+  Register Dst = Extract->getReg(0);
+
+  std::optional<ValueAndVReg> MaybeInsertIndex =
+      getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI);
+
+  if (MaybeInsertIndex && MaybeInsertIndex->Value != *IndexC) {
+    // There is no one-use check. We have to keep the insert. When both Index
+    // registers are constants and not equal, we can look into the Vector
+    // register of the insert.
+    MatchInfo = [=](MachineIRBuilder &B) {
+      B.buildExtractVectorElement(Dst, Insert->getVectorReg(), Index);
+    };
+    return true;
   }
 
   return false;
 }
+
+bool CombinerHelper::matchExtractVectorElementWithFreeze(
+    const MachineOperand &MO, BuildFnTy &MatchInfo) {
+  MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+  GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root);
+
+  Register Vector = Extract->getVectorReg();
+
+  //
+  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+  //  %freeze:_(<2 x s32>) = G_FREEZE %bv(<2 x s32>)
+  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+  //
+  //  -->
+  //
+  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+  //  %freeze:_(s32) = G_FREEZE %extract(s32)
+  //
+  //
+
+  // For G_FREEZE, the input and the output types are identical. Moving the
+  // freeze from the Vector into the front of the extract preserves the freeze
+  // semantics. The result is still freeze'd. Furthermore, the Vector register
+  // becomes easier to analyze. A build vector could have been hidden behind the
+  // freeze.
+
+  // We expect a freeze on the Vector register.
+  GFreeze *Freeze = getOpcodeDef<GFreeze>(Vector, MRI);
+  if (!Freeze)
+    return false;
+
+  Register Dst = Extract->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  // We first have to check for one-use and legality of the freeze.
+  // The type of the extractVectorElement did not change.
+  if (!MRI.hasOneNonDBGUse(Freeze->getReg(0)) ||
+      !isLegalOrBeforeLegalizer({TargetOpcode::G_FREEZE, {DstTy}}))
+    return false;
+
+  Register Index = Extract->getIndexReg();
+
+  // We move the freeze from the Vector register in front of the
+  // extractVectorElement.
+  MatchInfo = [=](MachineIRBuilder &B) {
+    auto Extract =
+        B.buildExtractVectorElement(DstTy, Freeze->getSourceReg(), Index);
+    B.buildFreeze(Dst, Extract);
+  };
+
+  return true;
+}
+
+bool CombinerHelper::matchExtractVectorElementWithBuildVector(
+    const MachineOperand &MO, BuildFnTy &MatchInfo) {
+  MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+  GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root);
+
+  //
+  //  %zero:_(s64) = G_CONSTANT i64 0
+  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s64)
+  //
+  //  -->
+  //
+  //  %extract:_(32) = COPY %arg1(s32)
+  //
+  //
+  //
+  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+  //
+  //  -->
+  //
+  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
+  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+  //
+
+  Register Vector = Extract->getVectorReg();
+
+  // We expect a buildVector on the Vector register.
+  GBuildVector *Build = getOpcodeDef<GBuildVector>(Vector, MRI);
+  if (!Build)
+    return false;
+
+  LLT VectorTy = MRI.getType(Vector);
+
+  // There is a one-use check. There are more combines on build vectors.
+  EVT Ty(getMVTForLLT(VectorTy));
+  if (!MRI.hasOneNonDBGUse(Build->getReg(0)) ||
+      !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
+    return false;
+
+  Register Index = Extract->getIndexReg();
+
+  // If the Index is constant, then we can extract the element from the given
+  // offset.
+  std::optional<ValueAndVReg> MaybeIndex =
+      getIConstantVRegValWithLookThrough(Index, MRI);
+  if (!MaybeIndex)
+    return false;
+
+  // We now know that there is a buildVector def'd on the Vector register and
+  // the index is const. The combine will succeed.
+
+  Register Dst = Extract->getReg(0);
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildCopy(Dst, Build->getSourceReg(MaybeIndex->Value.getLimitedValue()));
+  };
+
+  return true;
+}

>From 0b2082b7e1490ea09c631253618605d87d698076 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 31 Mar 2024 16:25:57 +0200
Subject: [PATCH 5/7] add build vector trunc

---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  5 ++
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |  8 +++
 .../include/llvm/Target/GlobalISel/Combine.td | 15 ++++-
 .../GlobalISel/CombinerHelperVectorOps.cpp    | 61 +++++++++++++++++++
 .../GlobalISel/combine-extract-vec-elt.mir    | 57 +++++++++++++++++
 5 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index b597c6229b57a4..591b922309966a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -830,6 +830,11 @@ class CombinerHelper {
   bool matchExtractVectorElementWithBuildVector(const MachineOperand &MO,
                                                 BuildFnTy &MatchInfo);
 
+  /// Combine extract vector element with a build vector trunc on the vector
+  /// register.
+  bool matchExtractVectorElementWithBuildVectorTrunc(const MachineOperand &MO,
+                                                     BuildFnTy &MatchInfo);
+
   /// Combine extract vector element with a insert vector element on the vector
   /// register and different indices.
   bool matchExtractVectorElementWithDifferentIndices(const MachineOperand &MO,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6727db6988a360..25e47114e4a39a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -286,6 +286,14 @@ class GBuildVector : public GMergeLikeInstr {
   }
 };
 
+/// Represents a G_BUILD_VECTOR_TRUNC.
+class GBuildVectorTrunc : public GMergeLikeInstr {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR_TRUNC;
+  }
+};
+
 /// Represents a G_PTR_ADD.
 class GPtrAdd : public GenericMachineInstr {
 public:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 976e7140c843db..373129043052c5 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1317,6 +1317,13 @@ def extract_vector_element_build_vector : GICombineRule<
    [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
    (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
 
+def extract_vector_element_build_vector_trunc : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR_TRUNC $src, $x),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
 def extract_vector_element_freeze : GICombineRule<
    (defs root:$root, build_fn_matchinfo:$matchinfo),
    (match (G_FREEZE $src, $input),
@@ -1338,6 +1345,7 @@ match_extract_of_element,
 extract_vector_element_not_const,
 extract_vector_element_different_indices,
 extract_vector_element_build_vector,
+extract_vector_element_build_vector_trunc,
 extract_vector_element_freeze
 ]>;
 
@@ -1396,9 +1404,10 @@ def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
 def constant_fold_binops : GICombineGroup<[constant_fold_binop,
                                            constant_fold_fp_binop]>;
 
-def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
-    extract_vec_elt_combines, combines_for_extload, combine_extracted_vector_load,
-    undef_combines, identity_combines, phi_combines, vector_ops_combines,
+def all_combines : GICombineGroup<[trivial_combines, vector_ops_combines,
+    insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
+    combine_extracted_vector_load,
+    undef_combines, identity_combines, phi_combines,
     simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
     reassocs, ptr_add_immed_chain,
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index d97b2df7b5d2c8..2b7c6cd6471646 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -272,3 +272,64 @@ bool CombinerHelper::matchExtractVectorElementWithBuildVector(
 
   return true;
 }
+
+bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc(
+    const MachineOperand &MO, BuildFnTy &MatchInfo) {
+  MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
+  GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root);
+
+  //
+  //  %zero:_(s64) = G_CONSTANT i64 0
+  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
+  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s64)
+  //
+  //  -->
+  //
+  //  %extract:_(32) = COPY %arg1(s32)
+  //
+  //
+  //
+  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
+  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+  //
+  //  -->
+  //
+  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
+  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
+  //
+
+  Register Vector = Extract->getVectorReg();
+
+  // We expect a buildVectorTrunc on the Vector register.
+  GBuildVectorTrunc *Build = getOpcodeDef<GBuildVectorTrunc>(Vector, MRI);
+  if (!Build)
+    return false;
+
+  LLT VectorTy = MRI.getType(Vector);
+
+  // There is a one-use check. There are more combines on build vectors.
+  EVT Ty(getMVTForLLT(VectorTy));
+  if (!MRI.hasOneNonDBGUse(Build->getReg(0)) ||
+      !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
+    return false;
+
+  Register Index = Extract->getIndexReg();
+
+  // If the Index is constant, then we can extract the element from the given
+  // offset.
+  std::optional<ValueAndVReg> MaybeIndex =
+      getIConstantVRegValWithLookThrough(Index, MRI);
+  if (!MaybeIndex)
+    return false;
+
+  // We now know that there is a buildVectorTrunc def'd on the Vector register
+  // and the index is const. The combine will succeed.
+
+  Register Dst = Extract->getReg(0);
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildCopy(Dst, Build->getSourceReg(MaybeIndex->Value.getLimitedValue()));
+  };
+
+  return true;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index 37dc33330196a8..77ab63e9582983 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -456,3 +456,60 @@ body:             |
 
 ...
 ---
+name:            extract_from_build_vector_trunc_const2
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_build_vector_trunc_const2
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %extract:_(s32) = G_TRUNC %arg1(s64)
+    ; CHECK-NEXT: $w0 = COPY %extract(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %idx:_(s32) = G_CONSTANT i32 0
+    %arg1:_(s64) = COPY $x0
+    %arg2:_(s64) = COPY $x1
+    %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
+    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s32)
+    $w0 = COPY %extract(s32)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            extract_from_build_vector_trunc2
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_build_vector_trunc2
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %arg2:_(s64) = COPY $x1
+    ; CHECK-NEXT: %idx:_(s32) = COPY $w0
+    ; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
+    ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s32)
+    ; CHECK-NEXT: $w0 = COPY %extract(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %vec:_(<2 x s64>) = COPY $q0
+    %arg1:_(s64) = COPY $x0
+    %arg2:_(s64) = COPY $x1
+    %idx:_(s32) = COPY $w0
+    %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
+    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s32)
+    $w0 = COPY %extract(s32)
+    RET_ReallyLR implicit $x0
+...
+---

>From 38e1e622b42a61271df177a2c46a8ecb707ea243 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 31 Mar 2024 18:54:57 +0200
Subject: [PATCH 6/7] improve build vectors

---
 .../include/llvm/Target/GlobalISel/Combine.td | 171 +++++++++++++++++-
 .../GlobalISel/CombinerHelperVectorOps.cpp    |  17 +-
 .../GlobalISel/combine-extract-vec-elt.mir    |  35 +++-
 3 files changed, 208 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 373129043052c5..623f165db6dddb 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1310,16 +1310,156 @@ def extract_vector_element_different_indices : GICombineRule<
    [{ return Helper.matchExtractVectorElementWithDifferentIndices(${root}, ${matchinfo}); }]),
    (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
 
-def extract_vector_element_build_vector : GICombineRule<
+def extract_vector_element_build_vector2 : GICombineRule<
    (defs root:$root, build_fn_matchinfo:$matchinfo),
-   (match (G_BUILD_VECTOR $src, $x),
+   (match (G_BUILD_VECTOR $src, $x, $y),
           (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
    [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
    (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
 
-def extract_vector_element_build_vector_trunc : GICombineRule<
+def extract_vector_element_build_vector3 : GICombineRule<
    (defs root:$root, build_fn_matchinfo:$matchinfo),
-   (match (G_BUILD_VECTOR_TRUNC $src, $x),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector4 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector5 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector6 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector7 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector8 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector9 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector10 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector11 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector12 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector13 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector14 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector15 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector16 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector_trunc2 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR_TRUNC $src, $x, $y),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector_trunc3 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector_trunc4 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector_trunc5 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a, $b),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector_trunc6 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a, $b, $c),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector_trunc7 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a, $b, $c, $d),
+          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
+   [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
+
+def extract_vector_element_build_vector_trunc8 : GICombineRule<
+   (defs root:$root, build_fn_matchinfo:$matchinfo),
+   (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a, $b, $c, $d, $e),
           (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
    [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
    (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
@@ -1344,8 +1484,27 @@ def vector_ops_combines: GICombineGroup<[
 match_extract_of_element,
 extract_vector_element_not_const,
 extract_vector_element_different_indices,
-extract_vector_element_build_vector,
-extract_vector_element_build_vector_trunc,
+extract_vector_element_build_vector2,
+extract_vector_element_build_vector3,
+extract_vector_element_build_vector4,
+extract_vector_element_build_vector5,
+extract_vector_element_build_vector7,
+extract_vector_element_build_vector8,
+extract_vector_element_build_vector9,
+extract_vector_element_build_vector10,
+extract_vector_element_build_vector11,
+extract_vector_element_build_vector12,
+extract_vector_element_build_vector13,
+extract_vector_element_build_vector14,
+extract_vector_element_build_vector15,
+extract_vector_element_build_vector16,
+extract_vector_element_build_vector_trunc2,
+extract_vector_element_build_vector_trunc3,
+extract_vector_element_build_vector_trunc4,
+extract_vector_element_build_vector_trunc5,
+extract_vector_element_build_vector_trunc6,
+extract_vector_element_build_vector_trunc7,
+extract_vector_element_build_vector_trunc8,
 extract_vector_element_freeze
 ]>;
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index 2b7c6cd6471646..be7db25f1005c4 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -92,7 +92,7 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI,
 
   // Fold extractVectorElement(Vector, TOOLARGE) -> undef
   if (IndexC && VectorTy.isFixedVector() &&
-      IndexC->uge(VectorTy.getNumElements()) &&
+      IndexC->getZExtValue() >= VectorTy.getNumElements() &&
       isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
     // For fixed-length vectors, it's invalid to extract out-of-range elements.
     MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
@@ -267,7 +267,7 @@ bool CombinerHelper::matchExtractVectorElementWithBuildVector(
   Register Dst = Extract->getReg(0);
 
   MatchInfo = [=](MachineIRBuilder &B) {
-    B.buildCopy(Dst, Build->getSourceReg(MaybeIndex->Value.getLimitedValue()));
+    B.buildCopy(Dst, Build->getSourceReg(MaybeIndex->Value.getZExtValue()));
   };
 
   return true;
@@ -309,8 +309,9 @@ bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc(
 
   // There is a one-use check. There are more combines on build vectors.
   EVT Ty(getMVTForLLT(VectorTy));
-  if (!MRI.hasOneNonDBGUse(Build->getReg(0)) ||
-      !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
+  if (!MRI.hasOneNonDBGUse(
+          Build->getReg(0) ||
+          !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)))
     return false;
 
   Register Index = Extract->getIndexReg();
@@ -326,9 +327,15 @@ bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc(
   // and the index is const. The combine will succeed.
 
   Register Dst = Extract->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  LLT SrcTy = MRI.getType(Build->getSourceReg(0));
+
+  // For buildVectorTrunc, the inputs are trunked.
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}}))
+    return false;
 
   MatchInfo = [=](MachineIRBuilder &B) {
-    B.buildCopy(Dst, Build->getSourceReg(MaybeIndex->Value.getLimitedValue()));
+    B.buildTrunc(Dst, Build->getSourceReg(MaybeIndex->Value.getZExtValue()));
   };
 
   return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index 77ab63e9582983..c2a38e26676cf9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -474,11 +474,13 @@ body:             |
     ; CHECK-NEXT: $w0 = COPY %extract(s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %vec:_(<2 x s64>) = COPY $q0
-    %idx:_(s32) = G_CONSTANT i32 0
     %arg1:_(s64) = COPY $x0
     %arg2:_(s64) = COPY $x1
-    %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64)
-    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s32)
+    %arg3:_(s64) = COPY $x0
+    %arg4:_(s64) = COPY $x1
+    %idx:_(s32) = G_CONSTANT i32 0
+    %bv:_(<4 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64), %arg3(s64), %arg4(s64)
+    %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %idx(s32)
     $w0 = COPY %extract(s32)
     RET_ReallyLR implicit $x0
 ...
@@ -503,7 +505,6 @@ body:             |
     ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s32)
     ; CHECK-NEXT: $w0 = COPY %extract(s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
-    %vec:_(<2 x s64>) = COPY $q0
     %arg1:_(s64) = COPY $x0
     %arg2:_(s64) = COPY $x1
     %idx:_(s32) = COPY $w0
@@ -513,3 +514,29 @@ body:             |
     RET_ReallyLR implicit $x0
 ...
 ---
+name:            extract_from_build_vector_trunc_const3
+alignment:       4
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: extract_from_build_vector_trunc_const3
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %arg1:_(s128) = COPY $q0
+    ; CHECK-NEXT: %extract:_(s64) = G_TRUNC %arg1(s128)
+    ; CHECK-NEXT: $x0 = COPY %extract(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %arg1:_(s128) = COPY $q0
+    %arg2:_(s128) = COPY $q1
+    %idx:_(s32) = G_CONSTANT i32 0
+    %bv:_(<2 x s64>) = G_BUILD_VECTOR_TRUNC %arg1(s128), %arg2(s128)
+    %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32)
+    $x0 = COPY %extract(s64)
+    RET_ReallyLR implicit $x0
+...
+---

>From 415106860196990376a3d33f8f6b982bc3370389 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 31 Mar 2024 20:25:01 +0200
Subject: [PATCH 7/7] fix test

---
 llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp | 5 ++---
 llvm/test/CodeGen/AArch64/extractvector-oob-load.mir    | 7 ++-----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index be7db25f1005c4..f9240453da49bb 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -309,9 +309,8 @@ bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc(
 
   // There is a one-use check. There are more combines on build vectors.
   EVT Ty(getMVTForLLT(VectorTy));
-  if (!MRI.hasOneNonDBGUse(
-          Build->getReg(0) ||
-          !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)))
+  if (!MRI.hasOneNonDBGUse(Build->getReg(0)) ||
+      !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
     return false;
 
   Register Index = Extract->getIndexReg();
diff --git a/llvm/test/CodeGen/AArch64/extractvector-oob-load.mir b/llvm/test/CodeGen/AArch64/extractvector-oob-load.mir
index e8c5819e75e090..e7e8c939910941 100644
--- a/llvm/test/CodeGen/AArch64/extractvector-oob-load.mir
+++ b/llvm/test/CodeGen/AArch64/extractvector-oob-load.mir
@@ -22,11 +22,8 @@ body:             |
     ; CHECK-LABEL: name: f
     ; CHECK: liveins: $x0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64))
-    ; CHECK-NEXT: $x0 = COPY [[LOAD]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: $x0 = COPY [[DEF]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(p0) = COPY $x0
     %3:_(s64) = G_CONSTANT i64 224567957



More information about the llvm-commits mailing list