[llvm] 867e379 - [AArch64][GlobalISel] Match G_SHUFFLE_VECTOR -> insert elt + extract elt

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 22 14:46:31 PST 2021


Author: Jessica Paquette
Date: 2021-02-22T14:44:09-08:00
New Revision: 867e379c0e14527eb7aa68485a10324693e35f5d

URL: https://github.com/llvm/llvm-project/commit/867e379c0e14527eb7aa68485a10324693e35f5d
DIFF: https://github.com/llvm/llvm-project/commit/867e379c0e14527eb7aa68485a10324693e35f5d.diff

LOG: [AArch64][GlobalISel] Match G_SHUFFLE_VECTOR -> insert elt + extract elt

Match a G_SHUFFLE_VECTOR with a mask that allows it to be represented as a
G_INSERT_VECTOR_ELT and a G_EXTRACT_VECTOR_ELT.

This ports `isINSMask` from AArch64ISelLowering and the portion of
`AArch64TargetLowering::LowerVECTOR_SHUFFLE` which handles the equivalent
transformation.

This provides more opportunities for matching DUP. We don't have all of the
necessary combines to actually make DUP out of these yet, but this is better for
size than the full TBL expansion for G_SHUFFLE_VECTOR.

This is a -0.1% code size improvement on CTMark/Bullet at -Os.

IR example: https://godbolt.org/z/sdcevT

Differential Revision: https://reviews.llvm.org/D97214

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuf-to-ins.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 59988af493c2..8f66f003f870 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -79,9 +79,13 @@ def ext: GICombineRule <
   (apply [{ applyEXT(*${root}, ${matchinfo}); }])
 >;
 
-// Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo
-// instruction.
-def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>;
+def shuf_to_ins_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder &)>">;
+def shuf_to_ins: GICombineRule <
+  (defs root:$root, shuf_to_ins_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchINS(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ return applyINS(*${root}, B, ${matchinfo}); }])
+>;
 
 def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
 def vashr_vlshr_imm : GICombineRule<
@@ -100,6 +104,10 @@ def form_duplane : GICombineRule <
   (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
 >;
 
+def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
+                                              form_duplane,
+                                              shuf_to_ins]>;
+
 def adjust_icmp_imm_matchdata :
   GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
 def adjust_icmp_imm : GICombineRule <
@@ -132,8 +140,8 @@ def mul_const : GICombineRule<
 // pseudos.
 def AArch64PostLegalizerLoweringHelper
     : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
-                       [shuffle_vector_pseudos, vashr_vlshr_imm,
-                        icmp_lowering, form_duplane]> {
+                       [shuffle_vector_lowering, vashr_vlshr_imm,
+                        icmp_lowering]> {
   let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
 }
 

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index a06ff4b5417a..361961388f2c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -176,6 +176,37 @@ static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
   return true;
 }
 
+/// Helper function for matchINS.
+///
+/// \returns a value when \p M is an ins mask for \p NumInputElements.
+///
+/// First element of the returned pair is true when the produced
+/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
+///
+/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
+static Optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
+                                                int NumInputElements) {
+  if (M.size() != static_cast<size_t>(NumInputElements))
+    return None;
+  int NumLHSMatch = 0, NumRHSMatch = 0;
+  int LastLHSMismatch = -1, LastRHSMismatch = -1;
+  for (int Idx = 0; Idx < NumInputElements; ++Idx) {
+    if (M[Idx] == -1) {
+      ++NumLHSMatch;
+      ++NumRHSMatch;
+      continue;
+    }
+    M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
+    M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
+  }
+  const int NumNeededToMatch = NumInputElements - 1;
+  if (NumLHSMatch == NumNeededToMatch)
+    return std::make_pair(true, LastLHSMismatch);
+  if (NumRHSMatch == NumNeededToMatch)
+    return std::make_pair(false, LastRHSMismatch);
+  return None;
+}
+
 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
 /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
 static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -378,6 +409,59 @@ static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
   return true;
 }
 
+/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
+/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
+///
+/// e.g.
+///   %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
+///
+/// Can be represented as
+///
+///   %extract = G_EXTRACT_VECTOR_ELT %left, 0
+///   %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
+///
+static bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
+                     std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+  Register Dst = MI.getOperand(0).getReg();
+  int NumElts = MRI.getType(Dst).getNumElements();
+  auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
+  if (!DstIsLeftAndDstLane)
+    return false;
+  bool DstIsLeft;
+  int DstLane;
+  std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
+  Register Left = MI.getOperand(1).getReg();
+  Register Right = MI.getOperand(2).getReg();
+  Register DstVec = DstIsLeft ? Left : Right;
+  Register SrcVec = Left;
+
+  int SrcLane = ShuffleMask[DstLane];
+  if (SrcLane >= NumElts) {
+    SrcVec = Right;
+    SrcLane -= NumElts;
+  }
+
+  auto ScalarTy = MRI.getType(Dst).getElementType();
+  MatchInfo = [=](MachineIRBuilder &MIB) {
+    MIB.buildInsertVectorElement(
+        Dst, DstVec,
+        MIB.buildExtractVectorElement(
+            ScalarTy, SrcVec, MIB.buildConstant(LLT::scalar(64), SrcLane)),
+        MIB.buildConstant(LLT::scalar(64), DstLane));
+  };
+  return true;
+}
+
+static bool applyINS(MachineInstr &MI, MachineIRBuilder &Builder,
+                     std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  Builder.setInstrAndDebugLoc(MI);
+  MatchInfo(Builder);
+  MI.eraseFromParent();
+  return true;
+}
+
 /// isVShiftRImm - Check if this is a valid vector for the immediate
 /// operand of a vector shift right operation. The value must be in the range:
 ///   1 <= Value <= ElementBits for a right shift.

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuf-to-ins.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuf-to-ins.mir
new file mode 100644
index 000000000000..29945fa68527
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuf-to-ins.mir
@@ -0,0 +1,310 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=aarch64-postlegalizer-lowering --aarch64postlegalizerloweringhelper-only-enable-rule="shuf_to_ins" -verify-machineinstrs %s -o - | FileCheck %s
+
+# REQUIRES: asserts
+
+# Check that we can recognize an ins mask for a shuffle vector.
+
+...
+---
+name:            v2s32_match_left_0
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; 2 elts -> need 1 match.
+    ;
+    ; Matched M[0] = 0 -> G_INSERT_VECTOR_ELT should use %left.
+    ; DstLane (G_INSERT_VECTOR_ELT) : 1, because M[1] != 1.
+    ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[DstLane] = 0
+
+    ; CHECK-LABEL: name: v2s32_match_left_0
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: %left:_(<2 x s32>) = COPY $d0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<2 x s32>), [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64)
+    ; CHECK: $d0 = COPY %shuf(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %left:_(<2 x s32>) = COPY $d0
+    %right:_(<2 x s32>) = COPY $d1
+    %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(0, 0)
+    $d0 = COPY %shuf(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            v2s32_match_left_1
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; 2 elts -> need 1 match.
+    ;
+    ; Matched M[1] = 1 -> G_INSERT_VECTOR_ELT should use %left.
+    ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 0.
+    ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 1
+
+    ; CHECK-LABEL: name: v2s32_match_left_1
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: %left:_(<2 x s32>) = COPY $d0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<2 x s32>), [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64)
+    ; CHECK: $d0 = COPY %shuf(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %left:_(<2 x s32>) = COPY $d0
+    %right:_(<2 x s32>) = COPY $d1
+    %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(1, 1)
+    $d0 = COPY %shuf(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            v2s32_match_left_3
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; 2 elts -> need 1 match.
+    ;
+    ; Matched M[0] = 1 -> G_INSERT_VECTOR_ELT should use %left.
+    ; DstLane (G_INSERT_VECTOR_ELT) : 1, because M[1] != 1.
+    ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[1] = 3 - 2 = 1
+
+    ; CHECK-LABEL: name: v2s32_match_left_3
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: %left:_(<2 x s32>) = COPY $d0
+    ; CHECK: %right:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %right(<2 x s32>), [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64)
+    ; CHECK: $d0 = COPY %shuf(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %left:_(<2 x s32>) = COPY $d0
+    %right:_(<2 x s32>) = COPY $d1
+    %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(0, 3)
+    $d0 = COPY %shuf(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+
+...
+---
+name:            v2s32_match_right_3
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; 2 elts -> need 1 match.
+    ;
+    ; Matched M[1] = 1 + 2 -> G_INSERT_VECTOR_ELT should use %right.
+    ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 2.
+    ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 1
+
+    ; CHECK-LABEL: name: v2s32_match_right_3
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: %left:_(<2 x s32>) = COPY $d0
+    ; CHECK: %right:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<2 x s32>), [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %right, [[EVEC]](s32), [[C1]](s64)
+    ; CHECK: $d0 = COPY %shuf(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %left:_(<2 x s32>) = COPY $d0
+    %right:_(<2 x s32>) = COPY $d1
+    %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(1, 3)
+    $d0 = COPY %shuf(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            v2s32_match_right_2
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; 2 elts -> need 1 match.
+    ;
+    ; Matched M[0] = 0 + 2 -> G_INSERT_VECTOR_ELT should use %right.
+    ; DstLane (G_INSERT_VECTOR_ELT) : 1, because M[1] != 3.
+    ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[1] = 0
+
+    ; CHECK-LABEL: name: v2s32_match_right_2
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: %left:_(<2 x s32>) = COPY $d0
+    ; CHECK: %right:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<2 x s32>), [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %right, [[EVEC]](s32), [[C1]](s64)
+    ; CHECK: $d0 = COPY %shuf(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %left:_(<2 x s32>) = COPY $d0
+    %right:_(<2 x s32>) = COPY $d1
+    %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, 0)
+    $d0 = COPY %shuf(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            dont_combine_too_many_matches_right
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; 2 elts -> need 1 match.
+    ;
+    ; Matched M[0] = 0 + 2, M[1] = 1 + 2 -> too many matches.
+
+    ; CHECK-LABEL: name: dont_combine_too_many_matches_right
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: %left:_(<2 x s32>) = COPY $d0
+    ; CHECK: %right:_(<2 x s32>) = COPY $d1
+    ; CHECK: %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, 3)
+    ; CHECK: $d0 = COPY %shuf(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %left:_(<2 x s32>) = COPY $d0
+    %right:_(<2 x s32>) = COPY $d1
+    %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, 3)
+    $d0 = COPY %shuf(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            tiebreaker
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; Matched the correct amount on the left and right.
+    ; Use left as a tiebreaker.
+    ;
+    ; Matched M[1] = 1 -> G_INSERT_VECTOR_ELT should use %left.
+    ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 0.
+    ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 2 - 2 = 0
+
+    ; CHECK-LABEL: name: tiebreaker
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: %left:_(<2 x s32>) = COPY $d0
+    ; CHECK: %right:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %right(<2 x s32>), [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64)
+    ; CHECK: $d0 = COPY %shuf(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %left:_(<2 x s32>) = COPY $d0
+    %right:_(<2 x s32>) = COPY $d1
+    %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, 1)
+    $d0 = COPY %shuf(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            tiebreaker_undef
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; Undef counts as a match for left and right.
+    ;
+    ; Matched M[1] = -1 -> G_INSERT_VECTOR_ELT should use %left.
+    ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 0.
+    ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 2 - 2 = 0
+
+    ; CHECK-LABEL: name: tiebreaker_undef
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: %left:_(<2 x s32>) = COPY $d0
+    ; CHECK: %right:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %right(<2 x s32>), [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64)
+    ; CHECK: $d0 = COPY %shuf(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %left:_(<2 x s32>) = COPY $d0
+    %right:_(<2 x s32>) = COPY $d1
+    %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(2, -1)
+    $d0 = COPY %shuf(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            match_left_undef
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; Undef counts as a match for left and right.
+    ;
+    ; Matched M[1] = -1 -> G_INSERT_VECTOR_ELT should use %left.
+    ; DstLane (G_INSERT_VECTOR_ELT) : 0, because M[0] != 0.
+    ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[0] = 3 - 2 = 1
+
+    ; CHECK-LABEL: name: match_left_undef
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: %left:_(<2 x s32>) = COPY $d0
+    ; CHECK: %right:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %right(<2 x s32>), [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: %shuf:_(<2 x s32>) = G_INSERT_VECTOR_ELT %left, [[EVEC]](s32), [[C1]](s64)
+    ; CHECK: $d0 = COPY %shuf(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %left:_(<2 x s32>) = COPY $d0
+    %right:_(<2 x s32>) = COPY $d1
+    %shuf:_(<2 x s32>) = G_SHUFFLE_VECTOR %left(<2 x s32>), %right, shufflemask(3, -1)
+    $d0 = COPY %shuf(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            match_right_undef
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+
+    ; Matched M[0] = 0 + 4, undef, undef => 3 matches on the right.
+    ;
+    ; DstLane (G_INSERT_VECTOR_ELT) : 3, because M[3] != 7.
+    ; SrcLane (G_EXTRACT_VECTOR_ELT) : M[3] = 2
+
+    ; CHECK-LABEL: name: match_right_undef
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: %left:_(<4 x s32>) = COPY $q0
+    ; CHECK: %right:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT %left(<4 x s32>), [[C]](s64)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CHECK: %shuf:_(<4 x s32>) = G_INSERT_VECTOR_ELT %right, [[EVEC]](s32), [[C1]](s64)
+    ; CHECK: $q0 = COPY %shuf(<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %left:_(<4 x s32>) = COPY $q0
+    %right:_(<4 x s32>) = COPY $q1
+    %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %left(<4 x s32>), %right, shufflemask(4, -1, -1, 2)
+    $q0 = COPY %shuf(<4 x s32>)
+    RET_ReallyLR implicit $q0


        


More information about the llvm-commits mailing list