[llvm] 8f54ebd - [AArch64][GlobalISel] Select llvm.aarch64.neon.st2 intrinsics

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 20 13:24:25 PDT 2021


Author: Jessica Paquette
Date: 2021-07-20T13:23:46-07:00
New Revision: 8f54ebd51d054807bdb75a1300667b64484a2504

URL: https://github.com/llvm/llvm-project/commit/8f54ebd51d054807bdb75a1300667b64484a2504
DIFF: https://github.com/llvm/llvm-project/commit/8f54ebd51d054807bdb75a1300667b64484a2504.diff

LOG: [AArch64][GlobalISel] Select llvm.aarch64.neon.st2 intrinsics

Add manual selection code similar to the code in AArch64ISelDAGToDAG, and add
`createTuple` helpers similar to the code there as well.

This accounted for around 111 fallbacks while building clang for AArch64 with
GlobalISel.

This also should make it easy to add selection code for other store
intrinsics.

As a minor cleanup, this uses `createQTuple` in the other place where we use
REG_SEQUENCE.

Differential Revision: https://reviews.llvm.org/D106332

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/select-st2.mir

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index ec4341f9c628..07fbf5698550 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -588,6 +588,58 @@ static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
   }
 }
 
+/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
+/// Helper function for functions like createDTuple and createQTuple.
+///
+/// \p RegClassIDs - The list of register class IDs available for some tuple of
+/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
+/// expected to contain between 2 and 4 tuple classes.
+///
+/// \p SubRegs - The list of subregister classes associated with each register
+/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
+/// subregister class. The index of each subregister class is expected to
+/// correspond with the index of each register class.
+///
+/// \returns Either the destination register of REG_SEQUENCE instruction that
+/// was created, or the 0th element of \p Regs if \p Regs contains a single
+/// element.
+static Register createTuple(ArrayRef<Register> Regs,
+                            const unsigned RegClassIDs[],
+                            const unsigned SubRegs[], MachineIRBuilder &MIB) {
+  unsigned NumRegs = Regs.size();
+  if (NumRegs == 1)
+    return Regs[0];
+  assert(NumRegs >= 2 && NumRegs <= 4 &&
+         "Only support between two and 4 registers in a tuple!");
+  const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
+  auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
+  auto RegSequence =
+      MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
+  for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
+    RegSequence.addUse(Regs[I]);
+    RegSequence.addImm(SubRegs[I]);
+  }
+  return RegSequence.getReg(0);
+}
+
+/// Create a tuple of D-registers using the registers in \p Regs.
+static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
+  static const unsigned RegClassIDs[] = {
+      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
+  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
+                                     AArch64::dsub2, AArch64::dsub3};
+  return createTuple(Regs, RegClassIDs, SubRegs, MIB);
+}
+
+/// Create a tuple of Q-registers using the registers in \p Regs.
+static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
+  static const unsigned RegClassIDs[] = {
+      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
+  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
+                                     AArch64::qsub2, AArch64::qsub3};
+  return createTuple(Regs, RegClassIDs, SubRegs, MIB);
+}
+
 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
   auto &MI = *Root.getParent();
   auto &MBB = *MI.getParent();
@@ -4700,15 +4752,10 @@ bool AArch64InstructionSelector::selectShuffleVector(
 
   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
   // Q registers for regalloc.
-  auto RegSeq = MIB.buildInstr(TargetOpcode::REG_SEQUENCE,
-                               {&AArch64::QQRegClass}, {Src1Reg})
-                    .addImm(AArch64::qsub0)
-                    .addUse(Src2Reg)
-                    .addImm(AArch64::qsub1);
-
+  SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
+  auto RegSeq = createQTuple(Regs, MIB);
   auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
                              {RegSeq, IndexLoad->getOperand(0)});
-  constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
   I.eraseFromParent();
   return true;
@@ -5007,6 +5054,43 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
     MIB.buildInstr(AArch64::BRK, {}, {})
         .addImm(I.getOperand(1).getImm() | ('U' << 8));
     break;
+  case Intrinsic::aarch64_neon_st2: {
+    Register Src1 = I.getOperand(1).getReg();
+    Register Src2 = I.getOperand(2).getReg();
+    Register Ptr = I.getOperand(3).getReg();
+    LLT Ty = MRI.getType(Src1);
+    const LLT S8 = LLT::scalar(8);
+    const LLT S16 = LLT::scalar(16);
+    const LLT S32 = LLT::scalar(32);
+    const LLT S64 = LLT::scalar(64);
+    const LLT P0 = LLT::pointer(0, 64);
+    unsigned Opc;
+    if (Ty == LLT::fixed_vector(8, S8))
+      Opc = AArch64::ST2Twov8b;
+    else if (Ty == LLT::fixed_vector(16, S8))
+      Opc = AArch64::ST2Twov16b;
+    else if (Ty == LLT::fixed_vector(4, S16))
+      Opc = AArch64::ST2Twov4h;
+    else if (Ty == LLT::fixed_vector(8, S16))
+      Opc = AArch64::ST2Twov8h;
+    else if (Ty == LLT::fixed_vector(2, S32))
+      Opc = AArch64::ST2Twov2s;
+    else if (Ty == LLT::fixed_vector(4, S32))
+      Opc = AArch64::ST2Twov4s;
+    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+      Opc = AArch64::ST2Twov2d;
+    else if (Ty == S64 | Ty == P0)
+      Opc = AArch64::ST1Twov1d;
+    else
+      llvm_unreachable("Unexpected type for st2!");
+    SmallVector<Register, 2> Regs = {Src1, Src2};
+    Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
+                                               : createDTuple(Regs, MIB);
+    auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
+    Store.cloneMemRefs(I);
+    constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
+    break;
+  }
   }
 
   I.eraseFromParent();

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-st2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-st2.mir
new file mode 100644
index 000000000000..2b1ec43e3005
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-st2.mir
@@ -0,0 +1,247 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            v8i8_ST2Twov8b
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $x0
+
+    ; CHECK-LABEL: name: v8i8_ST2Twov8b
+    ; CHECK: liveins: $d0, $d1, $x0
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:fpr64 = COPY $d0
+    ; CHECK: %src2:fpr64 = COPY $d1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+    ; CHECK: ST2Twov8b [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:fpr(<8 x s8>) = COPY $d0
+    %src2:fpr(<8 x s8>) = COPY $d1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<8 x s8>), %src2(<8 x s8>), %ptr(p0) :: (store (<2 x s64>))
+    RET_ReallyLR
+
+...
+---
+name:            v16i8_ST2Twov16b
+
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1, $x0
+
+    ; CHECK-LABEL: name: v16i8_ST2Twov16b
+    ; CHECK: liveins: $q0, $q1, $x0
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:fpr128 = COPY $q0
+    ; CHECK: %src2:fpr128 = COPY $q1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+    ; CHECK: ST2Twov16b [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:fpr(<16 x s8>) = COPY $q0
+    %src2:fpr(<16 x s8>) = COPY $q1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<16 x s8>), %src2(<16 x s8>), %ptr(p0) :: (store (<4 x s64>))
+    RET_ReallyLR
+
+...
+---
+name:            v4i16_ST2Twov4h
+
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $x0
+
+    ; CHECK-LABEL: name: v4i16_ST2Twov4h
+    ; CHECK: liveins: $d0, $d1, $x0
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:fpr64 = COPY $d0
+    ; CHECK: %src2:fpr64 = COPY $d1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+    ; CHECK: ST2Twov4h [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:fpr(<4 x s16>) = COPY $d0
+    %src2:fpr(<4 x s16>) = COPY $d1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<4 x s16>), %src2(<4 x s16>), %ptr(p0) :: (store (<2 x s64>))
+    RET_ReallyLR
+
+...
+---
+name:            v8i16_ST2Twov8h
+
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1, $x0
+
+    ; CHECK-LABEL: name: v8i16_ST2Twov8h
+    ; CHECK: liveins: $q0, $q1, $x0
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:fpr128 = COPY $q0
+    ; CHECK: %src2:fpr128 = COPY $q1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+    ; CHECK: ST2Twov8h [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:fpr(<8 x s16>) = COPY $q0
+    %src2:fpr(<8 x s16>) = COPY $q1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<8 x s16>), %src2(<8 x s16>), %ptr(p0) :: (store (<4 x s64>))
+    RET_ReallyLR
+
+...
+---
+name:            v2i32_ST2Twov2s
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $x0
+
+    ; CHECK-LABEL: name: v2i32_ST2Twov2s
+    ; CHECK: liveins: $d0, $d1, $x0
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:fpr64 = COPY $d0
+    ; CHECK: %src2:fpr64 = COPY $d1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+    ; CHECK: ST2Twov2s [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:fpr(<2 x s32>) = COPY $d0
+    %src2:fpr(<2 x s32>) = COPY $d1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<2 x s32>), %src2(<2 x s32>), %ptr(p0) :: (store (<2 x s64>))
+    RET_ReallyLR
+
+...
+---
+name:            v4i32_ST2Twov4s
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1, $x0
+
+    ; CHECK-LABEL: name: v4i32_ST2Twov4s
+    ; CHECK: liveins: $q0, $q1, $x0
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:fpr128 = COPY $q0
+    ; CHECK: %src2:fpr128 = COPY $q1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+    ; CHECK: ST2Twov4s [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:fpr(<4 x s32>) = COPY $q0
+    %src2:fpr(<4 x s32>) = COPY $q1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<4 x s32>), %src2(<4 x s32>), %ptr(p0) :: (store (<4 x s64>))
+    RET_ReallyLR
+
+...
+---
+name:            v2i64_ST2Twov2d_s64_elts
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1, $x0
+
+    ; CHECK-LABEL: name: v2i64_ST2Twov2d_s64_elts
+    ; CHECK: liveins: $q0, $q1, $x0
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:fpr128 = COPY $q0
+    ; CHECK: %src2:fpr128 = COPY $q1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+    ; CHECK: ST2Twov2d [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:fpr(<2 x s64>) = COPY $q0
+    %src2:fpr(<2 x s64>) = COPY $q1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<2 x s64>), %src2(<2 x s64>), %ptr(p0) :: (store (<4 x s64>))
+    RET_ReallyLR
+
+...
+---
+name:            v2i64_ST2Twov2d_s64_p0_elts
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1, $x0
+
+    ; CHECK-LABEL: name: v2i64_ST2Twov2d_s64_p0_elts
+    ; CHECK: liveins: $q0, $q1, $x0
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:fpr128 = COPY $q0
+    ; CHECK: %src2:fpr128 = COPY $q1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:qq = REG_SEQUENCE %src1, %subreg.qsub0, %src2, %subreg.qsub1
+    ; CHECK: ST2Twov2d [[REG_SEQUENCE]], %ptr :: (store (<4 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:fpr(<2 x p0>) = COPY $q0
+    %src2:fpr(<2 x p0>) = COPY $q1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(<2 x p0>), %src2(<2 x p0>), %ptr(p0) :: (store (<4 x s64>))
+    RET_ReallyLR
+
+...
+---
+name:            v1i64_ST1Twov1d_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+
+    ; CHECK-LABEL: name: v1i64_ST1Twov1d_s64
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:gpr64all = COPY $x0
+    ; CHECK: %src2:gpr64all = COPY $x1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+    ; CHECK: ST1Twov1d [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:gpr(s64) = COPY $x0
+    %src2:gpr(s64) = COPY $x1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(s64), %src2(s64), %ptr(p0) :: (store (<2 x s64>))
+    RET_ReallyLR
+
+...
+---
+name:            v1i64_ST1Twov1d_p0
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+
+    ; CHECK-LABEL: name: v1i64_ST1Twov1d_p0
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: %src1:gpr64all = COPY $x0
+    ; CHECK: %src2:gpr64all = COPY $x1
+    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:dd = REG_SEQUENCE %src1, %subreg.dsub0, %src2, %subreg.dsub1
+    ; CHECK: ST1Twov1d [[REG_SEQUENCE]], %ptr :: (store (<2 x s64>))
+    ; CHECK: RET_ReallyLR
+    %ptr:gpr(p0) = COPY $x0
+    %src1:gpr(p0) = COPY $x0
+    %src2:gpr(p0) = COPY $x1
+    G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), %src1(p0), %src2(p0), %ptr(p0) :: (store (<2 x s64>))
+    RET_ReallyLR
+
+...


        


More information about the llvm-commits mailing list