[llvm] 8dd34cc - [AArch64][GlobalISel] Select uzp1 and uzp2

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 3 15:10:09 PDT 2020


Author: Jessica Paquette
Date: 2020-06-03T15:09:41-07:00
New Revision: 8dd34cce0716e0d83c2f05375e8352b5fb4c680c

URL: https://github.com/llvm/llvm-project/commit/8dd34cce0716e0d83c2f05375e8352b5fb4c680c
DIFF: https://github.com/llvm/llvm-project/commit/8dd34cce0716e0d83c2f05375e8352b5fb4c680c.diff

LOG: [AArch64][GlobalISel] Select uzp1 and uzp2

Porting the mask stuff for uzp1 and uzp2 from AArch64ISelLowering.

Add two custom opcodes: G_UZP1 and G_UZP2.

Produce them in the post-legalizer combiner when the mask checks out.

Tests:

- postlegalizer-combiner-uzp.mir verifies that we create G_UZP1 and G_UZP2.
The testcases that check that we create them come from neon-perm.ll.

- select-uzp.mir verifies that we can select G_UZP1 and G_UZP2.

Differential Revision: https://reviews.llvm.org/D81049

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-uzp.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-uzp.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/lib/Target/AArch64/AArch64InstrGISel.td
    llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 0e96a0785e58..21d4450d43a3 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -29,11 +29,24 @@ def zip : GICombineRule<
   (defs root:$root, zip_matchdata:$matchinfo),
   (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
          [{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
-  (apply [{ applyZip(*${root}, ${matchinfo}); }])
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
 >;
 
+def uzp_matchdata : GIDefMatchData<"unsigned">;
+def uzp : GICombineRule<
+  (defs root:$root, uzp_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchUZP(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
+// Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo
+// instruction.
+def shuffle_vector_pseudos : GICombineGroup<[zip, uzp]>;
+
 def AArch64PostLegalizerCombinerHelper
     : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
-                       [erase_undef_store, combines_for_extload, zip]> {
+                       [erase_undef_store, combines_for_extload,
+                        shuffle_vector_pseudos]> {
   let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index e5b9546f859e..650b0eee53c8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -25,6 +25,20 @@ def G_ADD_LOW : AArch64GenericInstruction {
   let hasSideEffects = 0;
 }
 
+// Represents an uzp1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_UZP1 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+}
+
+// Represents an uzp2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_UZP2 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+}
+
 // Represents a zip1 instruction. Produced post-legalization from
 // G_SHUFFLE_VECTORs with appropriate masks.
 def G_ZIP1 : AArch64GenericInstruction {
@@ -39,5 +53,7 @@ def G_ZIP2 : AArch64GenericInstruction {
   let InOperandList = (ins type0:$v1, type0:$v2);
 }
 
+def : GINodeEquiv<G_UZP1, AArch64uzp1>;
+def : GINodeEquiv<G_UZP2, AArch64uzp2>;
 def : GINodeEquiv<G_ZIP1, AArch64zip1>;
 def : GINodeEquiv<G_ZIP2, AArch64zip2>;

diff  --git a/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp
index b53830cfe460..baafe080764b 100644
--- a/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PostLegalizerCombiner.cpp
@@ -28,6 +28,21 @@
 
 using namespace llvm;
 
+/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
+/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
+static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
+                      unsigned &WhichResult) {
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    // Skip undef indices.
+    if (M[i] < 0)
+      continue;
+    if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
+      return false;
+  }
+  return true;
+}
+
 /// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
 /// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
 static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
@@ -47,6 +62,23 @@ static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
   return true;
 }
 
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
+/// a G_UZP1 or G_UZP2 instruction.
+///
+/// \param [in] MI - The shuffle vector instruction.
+/// \param [out] Opc - Either G_UZP1 or G_UZP2 on success.
+static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
+                     unsigned &Opc) {
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+  unsigned WhichResult;
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+  unsigned NumElts = MRI.getType(MI.getOperand(0).getReg()).getNumElements();
+  if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
+    return false;
+  Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
+  return true;
+}
+
 static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
                      unsigned &Opc) {
   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
@@ -59,7 +91,9 @@ static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
   return true;
 }
 
-static bool applyZip(MachineInstr &MI, unsigned Opc) {
+/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
+/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
+static bool applyShuffleVectorPseudo(MachineInstr &MI, unsigned Opc) {
   MachineIRBuilder MIRBuilder(MI);
   MIRBuilder.buildInstr(Opc, {MI.getOperand(0).getReg()},
                         {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-uzp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-uzp.mir
new file mode 100644
index 000000000000..2717c6e21d41
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-uzp.mir
@@ -0,0 +1,146 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+#
+# Check that we can recognize a shuffle mask for a uzp instruction and produce
+# a G_UZP1 or G_UZP2 where appropriate.
+#
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            uzp1_v4s32
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+
+    ; CHECK-LABEL: name: uzp1_v4s32
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
+    ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 2, 4, 6)
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            uzp2_v4s32
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+
+    ; CHECK-LABEL: name: uzp2_v4s32
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
+    ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7)
+    $q0 = COPY %1(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            no_uzp1
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+
+    ; See isUZPMask: Mask[1] != 2 * i + 0
+
+    ; CHECK-LABEL: name: no_uzp1
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6)
+    ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 1, 4, 6)
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            no_uzp2
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+
+    ; See isUZPMask: Mask[1] != 2 * i + 1
+
+    ; CHECK-LABEL: name: no_uzp2
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7)
+    ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 4, 5, 7)
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            uzp1_undef
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+
+    ; Make sure that we can still produce a uzp1/uzp2 with undef indices.
+
+    ; CHECK-LABEL: name: uzp1_undef
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
+    ; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, -1, 4, 6)
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            uzp2_undef
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+
+    ; Make sure that we can still produce a uzp1/uzp2 with undef indices.
+
+    ; CHECK-LABEL: name: uzp2_undef
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
+    ; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7)
+    $q0 = COPY %1(<4 x s32>)
+    RET_ReallyLR implicit $q0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uzp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uzp.mir
new file mode 100644
index 000000000000..1d5affea6c10
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uzp.mir
@@ -0,0 +1,53 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+#
+# Check that we can select G_UZP1 and G_UZP2 via the tablegen importer.
+#
+# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            uzp1_v4s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+
+    ; CHECK-LABEL: name: uzp1_v4s32
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[UZP1v4i32_:%[0-9]+]]:fpr128 = UZP1v4i32 [[COPY]], [[COPY1]]
+    ; CHECK: $q0 = COPY [[UZP1v4i32_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:fpr(<4 x s32>) = COPY $q1
+    %2:fpr(<4 x s32>) = G_UZP1 %0, %1
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            uzp2_v4s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+  liveins: $q0, $q1
+
+    ; CHECK-LABEL: name: uzp2_v4s32
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[UZP2v4i32_:%[0-9]+]]:fpr128 = UZP2v4i32 [[COPY]], [[COPY1]]
+    ; CHECK: $q0 = COPY [[UZP2v4i32_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+  %0:fpr(<4 x s32>) = COPY $q0
+  %1:fpr(<4 x s32>) = COPY $q1
+  %2:fpr(<4 x s32>) = G_UZP2 %0, %1
+  $q0 = COPY %2(<4 x s32>)
+  RET_ReallyLR implicit $q0
+
+...


        


More information about the llvm-commits mailing list