[llvm] d4b3380 - [ARM] Handle Splats in MVE lane interleaving
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 30 03:19:28 PDT 2021
Author: David Green
Date: 2021-03-30T11:19:16+01:00
New Revision: d4b3380dfe62be6e2f1994e821d4d777e84e4294
URL: https://github.com/llvm/llvm-project/commit/d4b3380dfe62be6e2f1994e821d4d777e84e4294
DIFF: https://github.com/llvm/llvm-project/commit/d4b3380dfe62be6e2f1994e821d4d777e84e4294.diff
LOG: [ARM] Handle Splats in MVE lane interleaving
As another addition to MVE lane interleaving, this handles Splat shuffle
vectors, as the shuffle of a splat is a splat.
Differential Revision: https://reviews.llvm.org/D97291
Added:
Modified:
llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
index ce01245fbd14..9189298a9d04 100644
--- a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
+++ b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
@@ -212,6 +212,12 @@ static bool tryInterleave(Instruction *Start,
Worklist.push_back(cast<Instruction>(Use));
break;
+ case Instruction::ShuffleVector:
+ // A shuffle of a splat is a splat.
+ if (cast<ShuffleVectorInst>(I)->isZeroEltSplat())
+ continue;
+ LLVM_FALLTHROUGH;
+
default:
LLVM_DEBUG(dbgs() << " Unhandled instruction: " << *I << "\n");
return false;
diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
index 883a0781f313..db9c286a44be 100644
--- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
@@ -422,17 +422,15 @@ entry:
define arm_aapcs_vfpcc void @mul_i16(<8 x i16> *%A, <8 x i16> *%B, i32 %C, <8 x i16> *%D) {
; CHECK-LABEL: mul_i16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.s32 q0, [r1]
-; CHECK-NEXT: vldrh.s32 q1, [r0]
-; CHECK-NEXT: vldrh.s32 q2, [r0, #8]
+; CHECK-NEXT: vldrw.u32 q0, [r1]
+; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: vmul.i32 q0, q1, q0
-; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
+; CHECK-NEXT: vmullt.s16 q2, q1, q0
+; CHECK-NEXT: vmullb.s16 q0, q1, q0
+; CHECK-NEXT: vshl.s32 q2, r2
; CHECK-NEXT: vshl.s32 q0, r2
-; CHECK-NEXT: vmul.i32 q1, q2, q1
-; CHECK-NEXT: vstrh.32 q0, [r3]
-; CHECK-NEXT: vshl.s32 q1, r2
-; CHECK-NEXT: vstrh.32 q1, [r3, #8]
+; CHECK-NEXT: vmovnt.i32 q0, q2
+; CHECK-NEXT: vstrw.32 q0, [r3]
; CHECK-NEXT: bx lr
entry:
%a = load <8 x i16>, <8 x i16> *%A, align 4
@@ -451,17 +449,15 @@ entry:
define arm_aapcs_vfpcc void @mul_i8(<16 x i8> *%A, <16 x i8> *%B, i16 %C, <16 x i8> *%D) {
; CHECK-LABEL: mul_i8:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.s16 q0, [r1]
-; CHECK-NEXT: vldrb.s16 q1, [r0]
-; CHECK-NEXT: vldrb.s16 q2, [r0, #8]
+; CHECK-NEXT: vldrw.u32 q0, [r1]
+; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: rsbs r2, r2, #0
-; CHECK-NEXT: vmul.i16 q0, q1, q0
-; CHECK-NEXT: vldrb.s16 q1, [r1, #8]
+; CHECK-NEXT: vmullt.s8 q2, q1, q0
+; CHECK-NEXT: vmullb.s8 q0, q1, q0
+; CHECK-NEXT: vshl.s16 q2, r2
; CHECK-NEXT: vshl.s16 q0, r2
-; CHECK-NEXT: vmul.i16 q1, q2, q1
-; CHECK-NEXT: vstrb.16 q0, [r3]
-; CHECK-NEXT: vshl.s16 q1, r2
-; CHECK-NEXT: vstrb.16 q1, [r3, #8]
+; CHECK-NEXT: vmovnt.i16 q0, q2
+; CHECK-NEXT: vstrw.32 q0, [r3]
; CHECK-NEXT: bx lr
entry:
%a = load <16 x i8>, <16 x i8> *%A, align 4
More information about the llvm-commits
mailing list