[llvm] 2ec2b25 - [AArch64][GlobalISel] Select @llvm.aarch64.neon.ld2.*

Mon Aug 23 17:16:08 PDT 2021

Author: Jessica Paquette
Date: 2021-08-23T17:15:53-07:00
New Revision: 2ec2b25fbaafa845495249de4f25bdcd1141c6ef

URL: https://github.com/llvm/llvm-project/commit/2ec2b25fbaafa845495249de4f25bdcd1141c6ef
DIFF: https://github.com/llvm/llvm-project/commit/2ec2b25fbaafa845495249de4f25bdcd1141c6ef.diff

LOG: [AArch64][GlobalISel] Select @llvm.aarch64.neon.ld2.*

This is pretty similar to the ST2 selection code in
`AArch64InstructionSelector::selectIntrinsicWithSideEffects`.

This is a GISel equivalent of the ld2 case in `AArch64DAGToDAGISel::Select`.
There's some weirdness there that appears here too (e.g. using ld1 for scalar
cases, which are 1-element vectors in SDAG.)

It's a little gross that we have to create the copy and then select it right
after, but I think we'd need to refactor the existing copy selection code
quite a bit to do better.

This was falling back while building llvm-project with GISel for AArch64.

Differential Revision: https://reviews.llvm.org/D108590

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 15bdc81330f62..59ff8659148fd 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5057,6 +5057,11 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
   if (!IntrinID)
     return false;
 
+  const LLT S8 = LLT::scalar(8);
+  const LLT S16 = LLT::scalar(16);
+  const LLT S32 = LLT::scalar(32);
+  const LLT S64 = LLT::scalar(64);
+  const LLT P0 = LLT::pointer(0, 64);
   // Select the instruction.
   switch (IntrinID) {
   default:
@@ -5081,16 +5086,54 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
     MIB.buildInstr(AArch64::BRK, {}, {})
         .addImm(I.getOperand(1).getImm() | ('U' << 8));
     break;
+  case Intrinsic::aarch64_neon_ld2: {
+    Register Dst1 = I.getOperand(0).getReg();
+    Register Dst2 = I.getOperand(1).getReg();
+    Register Ptr = I.getOperand(3).getReg();
+    LLT Ty = MRI.getType(Dst1);
+    unsigned Opc = 0;
+    if (Ty == LLT::fixed_vector(8, S8))
+      Opc = AArch64::LD2Twov8b;
+    else if (Ty == LLT::fixed_vector(16, S8))
+      Opc = AArch64::LD2Twov16b;
+    else if (Ty == LLT::fixed_vector(4, S16))
+      Opc = AArch64::LD2Twov4h;
+    else if (Ty == LLT::fixed_vector(8, S16))
+      Opc = AArch64::LD2Twov8h;
+    else if (Ty == LLT::fixed_vector(2, S32))
+      Opc = AArch64::LD2Twov2s;
+    else if (Ty == LLT::fixed_vector(4, S32))
+      Opc = AArch64::LD2Twov4s;
+    else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+      Opc = AArch64::LD2Twov2d;
+    else if (Ty == S64 || Ty == P0)
+      Opc = AArch64::LD1Twov1d;
+    else
+      llvm_unreachable("Unexpected type for ld2!");
+    unsigned SubReg =
+        Ty.getSizeInBits() == 64 ? AArch64::dsub0 : AArch64::qsub0;
+    // This will be selected as a load into a wide register, which is broken
+    // into two vectors subregister copies.
+    auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
+    Load.cloneMemRefs(I);
+    constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
+    Register SelectedLoadDst = Load->getOperand(0).getReg();
+    // Emit the subreg copies and immediately select them.
+    // FIXME: We should refactor our copy code into an emitCopy helper and
+    // clean up uses of this pattern elsewhere in the selector.
+    auto Vec1 = MIB.buildInstr(TargetOpcode::COPY, {Dst1}, {})
+                    .addReg(SelectedLoadDst, 0, SubReg);
+    auto Vec2 = MIB.buildInstr(AArch64::COPY, {Dst2}, {})
+                    .addReg(SelectedLoadDst, 0, SubReg + 1);
+    selectCopy(*Vec1, TII, MRI, TRI, RBI);
+    selectCopy(*Vec2, TII, MRI, TRI, RBI);
+    break;
+  }
   case Intrinsic::aarch64_neon_st2: {
     Register Src1 = I.getOperand(1).getReg();
     Register Src2 = I.getOperand(2).getReg();
     Register Ptr = I.getOperand(3).getReg();
     LLT Ty = MRI.getType(Src1);
-    const LLT S8 = LLT::scalar(8);
-    const LLT S16 = LLT::scalar(16);
-    const LLT S32 = LLT::scalar(32);
-    const LLT S64 = LLT::scalar(64);
-    const LLT P0 = LLT::pointer(0, 64);
     unsigned Opc;
     if (Ty == LLT::fixed_vector(8, S8))
       Opc = AArch64::ST2Twov8b;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir
new file mode 100644
index 0000000000000..940485ea0b427
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir
@@ -0,0 +1,232 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+...
+---
+name:            LD2Twov8b
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD2Twov8b
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD2Twov8b:%[0-9]+]]:dd = LD2Twov8b %ptr :: (load (<8 x s64>))
+    ; CHECK: %dst1:fpr64 = COPY [[LD2Twov8b]].dsub0
+    ; CHECK: %dst2:fpr64 = COPY [[LD2Twov8b]].dsub1
+    ; CHECK: $d0 = COPY %dst1
+    ; CHECK: $d1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(<8 x s8>), %dst2:fpr(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<8 x s64>))
+    $d0 = COPY %dst1(<8 x s8>)
+    $d1 = COPY %dst2(<8 x s8>)
+    RET_ReallyLR implicit $d0, implicit $d1
+...
+---
+name:            LD2Twov16b
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD2Twov16b
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD2Twov16b:%[0-9]+]]:qq = LD2Twov16b %ptr :: (load (<16 x s64>))
+    ; CHECK: %dst1:fpr128 = COPY [[LD2Twov16b]].qsub0
+    ; CHECK: %dst2:fpr128 = COPY [[LD2Twov16b]].qsub1
+    ; CHECK: $q0 = COPY %dst1
+    ; CHECK: $q1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(<16 x s8>), %dst2:fpr(<16 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<16 x s64>))
+    $q0 = COPY %dst1(<16 x s8>)
+    $q1 = COPY %dst2(<16 x s8>)
+    RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name:            LD2Twov4h
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD2Twov4h
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD2Twov4h:%[0-9]+]]:dd = LD2Twov4h %ptr :: (load (<4 x s64>))
+    ; CHECK: %dst1:fpr64 = COPY [[LD2Twov4h]].dsub0
+    ; CHECK: %dst2:fpr64 = COPY [[LD2Twov4h]].dsub1
+    ; CHECK: $d0 = COPY %dst1
+    ; CHECK: $d1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(<4 x s16>), %dst2:fpr(<4 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<4 x s64>))
+    $d0 = COPY %dst1(<4 x s16>)
+    $d1 = COPY %dst2(<4 x s16>)
+    RET_ReallyLR implicit $d0, implicit $d1
+...
+---
+name:            LD2Twov8h
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD2Twov8h
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD2Twov8h:%[0-9]+]]:qq = LD2Twov8h %ptr :: (load (<8 x s64>))
+    ; CHECK: %dst1:fpr128 = COPY [[LD2Twov8h]].qsub0
+    ; CHECK: %dst2:fpr128 = COPY [[LD2Twov8h]].qsub1
+    ; CHECK: $q0 = COPY %dst1
+    ; CHECK: $q1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(<8 x s16>), %dst2:fpr(<8 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<8 x s64>))
+    $q0 = COPY %dst1(<8 x s16>)
+    $q1 = COPY %dst2(<8 x s16>)
+    RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name:            LD2Twov2s
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD2Twov2s
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD2Twov2s:%[0-9]+]]:dd = LD2Twov2s %ptr :: (load (<2 x s64>))
+    ; CHECK: %dst1:fpr64 = COPY [[LD2Twov2s]].dsub0
+    ; CHECK: %dst2:fpr64 = COPY [[LD2Twov2s]].dsub1
+    ; CHECK: $d0 = COPY %dst1
+    ; CHECK: $d1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(<2 x s32>), %dst2:fpr(<2 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<2 x s64>))
+    $d0 = COPY %dst1(<2 x s32>)
+    $d1 = COPY %dst2(<2 x s32>)
+    RET_ReallyLR implicit $d0, implicit $d1
+...
+---
+name:            LD2Twov4s
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD2Twov4s
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD2Twov4s:%[0-9]+]]:qq = LD2Twov4s %ptr :: (load (<4 x s64>))
+    ; CHECK: %dst1:fpr128 = COPY [[LD2Twov4s]].qsub0
+    ; CHECK: %dst2:fpr128 = COPY [[LD2Twov4s]].qsub1
+    ; CHECK: $q0 = COPY %dst1
+    ; CHECK: $q1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(<4 x s32>), %dst2:fpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<4 x s64>))
+    $q0 = COPY %dst1(<4 x s32>)
+    $q1 = COPY %dst2(<4 x s32>)
+    RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name:            LD2Twov2d_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD2Twov2d_s64
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD2Twov2d:%[0-9]+]]:qq = LD2Twov2d %ptr :: (load (<2 x s64>))
+    ; CHECK: %dst1:fpr128 = COPY [[LD2Twov2d]].qsub0
+    ; CHECK: %dst2:fpr128 = COPY [[LD2Twov2d]].qsub1
+    ; CHECK: $q0 = COPY %dst1
+    ; CHECK: $q1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(<2 x s64>), %dst2:fpr(<2 x s64>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<2 x s64>))
+    $q0 = COPY %dst1(<2 x s64>)
+    $q1 = COPY %dst2(<2 x s64>)
+    RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name:            LD2Twov2d_p0
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD2Twov2d_p0
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD2Twov2d:%[0-9]+]]:qq = LD2Twov2d %ptr :: (load (<2 x p0>))
+    ; CHECK: %dst1:fpr128 = COPY [[LD2Twov2d]].qsub0
+    ; CHECK: %dst2:fpr128 = COPY [[LD2Twov2d]].qsub1
+    ; CHECK: $q0 = COPY %dst1
+    ; CHECK: $q1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(<2 x p0>), %dst2:fpr(<2 x p0>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<2 x p0>))
+    $q0 = COPY %dst1(<2 x p0>)
+    $q1 = COPY %dst2(<2 x p0>)
+    RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name:            LD1Twov1d_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD1Twov1d_s64
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD1Twov1d:%[0-9]+]]:dd = LD1Twov1d %ptr :: (load (s64))
+    ; CHECK: %dst1:fpr64 = COPY [[LD1Twov1d]].dsub0
+    ; CHECK: %dst2:fpr64 = COPY [[LD1Twov1d]].dsub1
+    ; CHECK: $d0 = COPY %dst1
+    ; CHECK: $d1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(s64), %dst2:fpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (s64))
+    $d0 = COPY %dst1(s64)
+    $d1 = COPY %dst2(s64)
+    RET_ReallyLR implicit $d0, implicit $d1
+...
+---
+name:            LD1Twov1d_p0
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: LD1Twov1d_p0
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %ptr:gpr64sp = COPY $x0
+    ; CHECK: [[LD1Twov1d:%[0-9]+]]:dd = LD1Twov1d %ptr :: (load (p0))
+    ; CHECK: %dst1:fpr64 = COPY [[LD1Twov1d]].dsub0
+    ; CHECK: %dst2:fpr64 = COPY [[LD1Twov1d]].dsub1
+    ; CHECK: $d0 = COPY %dst1
+    ; CHECK: $d1 = COPY %dst2
+    ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+    %ptr:gpr(p0) = COPY $x0
+    %dst1:fpr(p0), %dst2:fpr(p0) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (p0))
+    $d0 = COPY %dst1(p0)
+    $d1 = COPY %dst2(p0)
+    RET_ReallyLR implicit $d0, implicit $d1