[llvm] cc95bb1 - [AArch64][GlobalISel] Implement selection of <2 x float> vector splat.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 14:22:00 PST 2020
Author: Amara Emerson
Date: 2020-01-09T14:05:35-08:00
New Revision: cc95bb1f57c674c0efdfc134eab8ed8c50f2a6e3
URL: https://github.com/llvm/llvm-project/commit/cc95bb1f57c674c0efdfc134eab8ed8c50f2a6e3
DIFF: https://github.com/llvm/llvm-project/commit/cc95bb1f57c674c0efdfc134eab8ed8c50f2a6e3.diff
LOG: [AArch64][GlobalISel] Implement selection of <2 x float> vector splat.
Also requires making G_IMPLICIT_DEF of v2s32 legal.
Differential Revision: https://reviews.llvm.org/D72422
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir
llvm/test/CodeGen/AArch64/arm64-rev.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index ad59a95de288..450756464448 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -3703,15 +3703,44 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
// We're done, now find out what kind of splat we need.
LLT VecTy = MRI.getType(I.getOperand(0).getReg());
LLT EltTy = VecTy.getElementType();
- if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
- LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
+ if (EltTy.getSizeInBits() < 32) {
+ LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet");
return false;
}
bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
- static const unsigned OpcTable[2][2] = {
- {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
- {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
- unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
+ unsigned Opc = 0;
+ if (IsFP) {
+ switch (EltTy.getSizeInBits()) {
+ case 32:
+ if (VecTy.getNumElements() == 2) {
+ Opc = AArch64::DUPv2i32lane;
+ } else {
+ Opc = AArch64::DUPv4i32lane;
+ assert(VecTy.getNumElements() == 4);
+ }
+ break;
+ case 64:
+ assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
+ Opc = AArch64::DUPv2i64lane;
+ break;
+ }
+ } else {
+ switch (EltTy.getSizeInBits()) {
+ case 32:
+ if (VecTy.getNumElements() == 2) {
+ Opc = AArch64::DUPv2i32gpr;
+ } else {
+ Opc = AArch64::DUPv4i32gpr;
+ assert(VecTy.getNumElements() == 4);
+ }
+ break;
+ case 64:
+ assert(VecTy.getNumElements() == 2 && "Unexpected num elts");
+ Opc = AArch64::DUPv2i64gpr;
+ break;
+ }
+ }
+ assert(Opc && "Did not compute an opcode for a dup");
// For FP splats, we need to widen the scalar reg via undef too.
if (IsFP) {
diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 8d7c2bef6ea3..95719a35c6da 100644
--- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -59,7 +59,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
}
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
- .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
+ .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
.clampScalar(0, s1, s64)
.widenScalarToNextPow2(0, 8)
.fewerElementsIf(
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
index b3e35f6605e0..f8a4347a0344 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
@@ -37,11 +37,11 @@ body: |
# FIXME: s2 not correctly handled
---
-name: test_implicit_def_v2s32
+name: test_implicit_def_v4s32
body: |
bb.0:
- ; CHECK-LABEL: name: test_implicit_def_v2s32
+ ; CHECK-LABEL: name: test_implicit_def_v4s32
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
; CHECK: $x0 = COPY [[UV]](<2 x s32>)
@@ -67,3 +67,18 @@ body: |
$q0 = COPY %1
$q1 = COPY %2
...
+---
+name: test_implicit_def_v2s32
+body: |
+ bb.0:
+
+ ; CHECK-LABEL: name: test_implicit_def_v2s32
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+ ; CHECK: $w0 = COPY [[UV]](s32)
+ ; CHECK: $w1 = COPY [[UV1]](s32)
+ %0:_(<2 x s32>) = G_IMPLICIT_DEF
+ %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0
+ $w0 = COPY %1
+ $w1 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir
index 4c0f13adc56f..77aa0c5528ed 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir
@@ -49,6 +49,31 @@ body: |
$q0 = COPY %4(<2 x s64>)
RET_ReallyLR implicit $q0
+...
+---
+name: splat_2xi32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: splat_2xi32
+ ; CHECK: liveins: $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: [[DUPv2i32gpr:%[0-9]+]]:fpr64 = DUPv2i32gpr [[COPY]]
+ ; CHECK: $d0 = COPY [[DUPv2i32gpr]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:gpr(s32) = COPY $w0
+ %2:fpr(<2 x s32>) = G_IMPLICIT_DEF
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %1:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+ %4:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0)
+ $d0 = COPY %4(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
...
---
name: splat_4xf32
@@ -103,6 +128,33 @@ body: |
$q0 = COPY %4(<2 x s64>)
RET_ReallyLR implicit $q0
+...
+---
+name: splat_2xf32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $s0
+
+ ; CHECK-LABEL: name: splat_2xf32
+ ; CHECK: liveins: $s0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub
+ ; CHECK: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
+ ; CHECK: $d0 = COPY [[DUPv2i32lane]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:fpr(s32) = COPY $s0
+ %2:fpr(<2 x s32>) = G_IMPLICIT_DEF
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %1:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+ %4:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0)
+ $d0 = COPY %4(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
...
---
name: splat_2xf64_copies
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 8b6f8c596f67..6b063687d8af 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -295,8 +295,7 @@ define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
; FALLBACK-NEXT: ldr d0, [x0]
; FALLBACK-NEXT: adrp x8, .LCPI13_0
; FALLBACK-NEXT: ldr d1, [x8, :lo12:.LCPI13_0]
-; FALLBACK-NEXT: mov.s v2[1], w8
-; FALLBACK-NEXT: mov.d v0[1], v2[0]
+; FALLBACK-NEXT: mov.d v0[1], v0[0]
; FALLBACK-NEXT: tbl.16b v0, { v0 }, v1
; FALLBACK-NEXT: // kill: def $d0 killed $d0 killed $q0
; FALLBACK-NEXT: ret
@@ -317,8 +316,7 @@ define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
; FALLBACK-NEXT: ldr d0, [x0]
; FALLBACK-NEXT: adrp x8, .LCPI14_0
; FALLBACK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0]
-; FALLBACK-NEXT: mov.s v2[1], w8
-; FALLBACK-NEXT: mov.d v0[1], v2[0]
+; FALLBACK-NEXT: mov.d v0[1], v0[0]
; FALLBACK-NEXT: tbl.16b v0, { v0 }, v1
; FALLBACK-NEXT: // kill: def $d0 killed $d0 killed $q0
; FALLBACK-NEXT: ret
More information about the llvm-commits
mailing list