[llvm] 2ec2b25 - [AArch64][GlobalISel] Select @llvm.aarch64.neon.ld2.*
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 23 17:16:08 PDT 2021
Author: Jessica Paquette
Date: 2021-08-23T17:15:53-07:00
New Revision: 2ec2b25fbaafa845495249de4f25bdcd1141c6ef
URL: https://github.com/llvm/llvm-project/commit/2ec2b25fbaafa845495249de4f25bdcd1141c6ef
DIFF: https://github.com/llvm/llvm-project/commit/2ec2b25fbaafa845495249de4f25bdcd1141c6ef.diff
LOG: [AArch64][GlobalISel] Select @llvm.aarch64.neon.ld2.*
This is pretty similar to the ST2 selection code in
`AArch64InstructionSelector::selectIntrinsicWithSideEffects`.
This is a GISel equivalent of the ld2 case in `AArch64DAGToDAGISel::Select`.
There's some weirdness there that appears here too (e.g. using ld1 for scalar
cases, which are 1-element vectors in SDAG.)
It's a little gross that we have to create the copy and then select it right
after, but I think we'd need to refactor the existing copy selection code
quite a bit to do better.
This was falling back while building llvm-project with GISel for AArch64.
Differential Revision: https://reviews.llvm.org/D108590
Added:
llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 15bdc81330f62..59ff8659148fd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5057,6 +5057,11 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
if (!IntrinID)
return false;
+ const LLT S8 = LLT::scalar(8);
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+ const LLT P0 = LLT::pointer(0, 64);
// Select the instruction.
switch (IntrinID) {
default:
@@ -5081,16 +5086,54 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MIB.buildInstr(AArch64::BRK, {}, {})
.addImm(I.getOperand(1).getImm() | ('U' << 8));
break;
+ case Intrinsic::aarch64_neon_ld2: {
+ Register Dst1 = I.getOperand(0).getReg();
+ Register Dst2 = I.getOperand(1).getReg();
+ Register Ptr = I.getOperand(3).getReg();
+ LLT Ty = MRI.getType(Dst1);
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD2Twov8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD2Twov16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD2Twov4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD2Twov8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD2Twov2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD2Twov4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD2Twov2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Twov1d;
+ else
+ llvm_unreachable("Unexpected type for ld2!");
+ unsigned SubReg =
+ Ty.getSizeInBits() == 64 ? AArch64::dsub0 : AArch64::qsub0;
+ // This will be selected as a load into a wide register, which is broken
+ // into two vectors subregister copies.
+ auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
+ Load.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
+ Register SelectedLoadDst = Load->getOperand(0).getReg();
+ // Emit the subreg copies and immediately select them.
+ // FIXME: We should refactor our copy code into an emitCopy helper and
+ // clean up uses of this pattern elsewhere in the selector.
+ auto Vec1 = MIB.buildInstr(TargetOpcode::COPY, {Dst1}, {})
+ .addReg(SelectedLoadDst, 0, SubReg);
+ auto Vec2 = MIB.buildInstr(AArch64::COPY, {Dst2}, {})
+ .addReg(SelectedLoadDst, 0, SubReg + 1);
+ selectCopy(*Vec1, TII, MRI, TRI, RBI);
+ selectCopy(*Vec2, TII, MRI, TRI, RBI);
+ break;
+ }
case Intrinsic::aarch64_neon_st2: {
Register Src1 = I.getOperand(1).getReg();
Register Src2 = I.getOperand(2).getReg();
Register Ptr = I.getOperand(3).getReg();
LLT Ty = MRI.getType(Src1);
- const LLT S8 = LLT::scalar(8);
- const LLT S16 = LLT::scalar(16);
- const LLT S32 = LLT::scalar(32);
- const LLT S64 = LLT::scalar(64);
- const LLT P0 = LLT::pointer(0, 64);
unsigned Opc;
if (Ty == LLT::fixed_vector(8, S8))
Opc = AArch64::ST2Twov8b;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir
new file mode 100644
index 0000000000000..940485ea0b427
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ld2.mir
@@ -0,0 +1,232 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+...
+---
+name: LD2Twov8b
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD2Twov8b
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD2Twov8b:%[0-9]+]]:dd = LD2Twov8b %ptr :: (load (<8 x s64>))
+ ; CHECK: %dst1:fpr64 = COPY [[LD2Twov8b]].dsub0
+ ; CHECK: %dst2:fpr64 = COPY [[LD2Twov8b]].dsub1
+ ; CHECK: $d0 = COPY %dst1
+ ; CHECK: $d1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(<8 x s8>), %dst2:fpr(<8 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<8 x s64>))
+ $d0 = COPY %dst1(<8 x s8>)
+ $d1 = COPY %dst2(<8 x s8>)
+ RET_ReallyLR implicit $d0, implicit $d1
+...
+---
+name: LD2Twov16b
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD2Twov16b
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD2Twov16b:%[0-9]+]]:qq = LD2Twov16b %ptr :: (load (<16 x s64>))
+ ; CHECK: %dst1:fpr128 = COPY [[LD2Twov16b]].qsub0
+ ; CHECK: %dst2:fpr128 = COPY [[LD2Twov16b]].qsub1
+ ; CHECK: $q0 = COPY %dst1
+ ; CHECK: $q1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(<16 x s8>), %dst2:fpr(<16 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<16 x s64>))
+ $q0 = COPY %dst1(<16 x s8>)
+ $q1 = COPY %dst2(<16 x s8>)
+ RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name: LD2Twov4h
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD2Twov4h
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD2Twov4h:%[0-9]+]]:dd = LD2Twov4h %ptr :: (load (<4 x s64>))
+ ; CHECK: %dst1:fpr64 = COPY [[LD2Twov4h]].dsub0
+ ; CHECK: %dst2:fpr64 = COPY [[LD2Twov4h]].dsub1
+ ; CHECK: $d0 = COPY %dst1
+ ; CHECK: $d1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(<4 x s16>), %dst2:fpr(<4 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<4 x s64>))
+ $d0 = COPY %dst1(<4 x s16>)
+ $d1 = COPY %dst2(<4 x s16>)
+ RET_ReallyLR implicit $d0, implicit $d1
+...
+---
+name: LD2Twov8h
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD2Twov8h
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD2Twov8h:%[0-9]+]]:qq = LD2Twov8h %ptr :: (load (<8 x s64>))
+ ; CHECK: %dst1:fpr128 = COPY [[LD2Twov8h]].qsub0
+ ; CHECK: %dst2:fpr128 = COPY [[LD2Twov8h]].qsub1
+ ; CHECK: $q0 = COPY %dst1
+ ; CHECK: $q1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(<8 x s16>), %dst2:fpr(<8 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<8 x s64>))
+ $q0 = COPY %dst1(<8 x s16>)
+ $q1 = COPY %dst2(<8 x s16>)
+ RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name: LD2Twov2s
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD2Twov2s
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD2Twov2s:%[0-9]+]]:dd = LD2Twov2s %ptr :: (load (<2 x s64>))
+ ; CHECK: %dst1:fpr64 = COPY [[LD2Twov2s]].dsub0
+ ; CHECK: %dst2:fpr64 = COPY [[LD2Twov2s]].dsub1
+ ; CHECK: $d0 = COPY %dst1
+ ; CHECK: $d1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(<2 x s32>), %dst2:fpr(<2 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<2 x s64>))
+ $d0 = COPY %dst1(<2 x s32>)
+ $d1 = COPY %dst2(<2 x s32>)
+ RET_ReallyLR implicit $d0, implicit $d1
+...
+---
+name: LD2Twov4s
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD2Twov4s
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD2Twov4s:%[0-9]+]]:qq = LD2Twov4s %ptr :: (load (<4 x s64>))
+ ; CHECK: %dst1:fpr128 = COPY [[LD2Twov4s]].qsub0
+ ; CHECK: %dst2:fpr128 = COPY [[LD2Twov4s]].qsub1
+ ; CHECK: $q0 = COPY %dst1
+ ; CHECK: $q1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(<4 x s32>), %dst2:fpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<4 x s64>))
+ $q0 = COPY %dst1(<4 x s32>)
+ $q1 = COPY %dst2(<4 x s32>)
+ RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name: LD2Twov2d_s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD2Twov2d_s64
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD2Twov2d:%[0-9]+]]:qq = LD2Twov2d %ptr :: (load (<2 x s64>))
+ ; CHECK: %dst1:fpr128 = COPY [[LD2Twov2d]].qsub0
+ ; CHECK: %dst2:fpr128 = COPY [[LD2Twov2d]].qsub1
+ ; CHECK: $q0 = COPY %dst1
+ ; CHECK: $q1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(<2 x s64>), %dst2:fpr(<2 x s64>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<2 x s64>))
+ $q0 = COPY %dst1(<2 x s64>)
+ $q1 = COPY %dst2(<2 x s64>)
+ RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name: LD2Twov2d_p0
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD2Twov2d_p0
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD2Twov2d:%[0-9]+]]:qq = LD2Twov2d %ptr :: (load (<2 x p0>))
+ ; CHECK: %dst1:fpr128 = COPY [[LD2Twov2d]].qsub0
+ ; CHECK: %dst2:fpr128 = COPY [[LD2Twov2d]].qsub1
+ ; CHECK: $q0 = COPY %dst1
+ ; CHECK: $q1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $q0, implicit $q1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(<2 x p0>), %dst2:fpr(<2 x p0>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (<2 x p0>))
+ $q0 = COPY %dst1(<2 x p0>)
+ $q1 = COPY %dst2(<2 x p0>)
+ RET_ReallyLR implicit $q0, implicit $q1
+...
+---
+name: LD1Twov1d_s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD1Twov1d_s64
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD1Twov1d:%[0-9]+]]:dd = LD1Twov1d %ptr :: (load (s64))
+ ; CHECK: %dst1:fpr64 = COPY [[LD1Twov1d]].dsub0
+ ; CHECK: %dst2:fpr64 = COPY [[LD1Twov1d]].dsub1
+ ; CHECK: $d0 = COPY %dst1
+ ; CHECK: $d1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(s64), %dst2:fpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (s64))
+ $d0 = COPY %dst1(s64)
+ $d1 = COPY %dst2(s64)
+ RET_ReallyLR implicit $d0, implicit $d1
+...
+---
+name: LD1Twov1d_p0
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: LD1Twov1d_p0
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %ptr:gpr64sp = COPY $x0
+ ; CHECK: [[LD1Twov1d:%[0-9]+]]:dd = LD1Twov1d %ptr :: (load (p0))
+ ; CHECK: %dst1:fpr64 = COPY [[LD1Twov1d]].dsub0
+ ; CHECK: %dst2:fpr64 = COPY [[LD1Twov1d]].dsub1
+ ; CHECK: $d0 = COPY %dst1
+ ; CHECK: $d1 = COPY %dst2
+ ; CHECK: RET_ReallyLR implicit $d0, implicit $d1
+ %ptr:gpr(p0) = COPY $x0
+ %dst1:fpr(p0), %dst2:fpr(p0) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld2), %ptr(p0) :: (load (p0))
+ $d0 = COPY %dst1(p0)
+ $d1 = COPY %dst2(p0)
+ RET_ReallyLR implicit $d0, implicit $d1
More information about the llvm-commits
mailing list