[llvm] 993b203 - [AArch64] Sink splat(s/zext(..)) to uses
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 13 07:47:51 PDT 2022
Author: David Green
Date: 2022-09-13T15:47:41+01:00
New Revision: 993b203b6a0ce62e1d16a31bc21b45540cad227c
URL: https://github.com/llvm/llvm-project/commit/993b203b6a0ce62e1d16a31bc21b45540cad227c
DIFF: https://github.com/llvm/llvm-project/commit/993b203b6a0ce62e1d16a31bc21b45540cad227c.diff
LOG: [AArch64] Sink splat(s/zext(..)) to uses
If the Shuffle is a splat and the operand is a zext/sext, sinking the
operand and the s/zext can help create indexed s/umull. This is
especially useful to prevent i64 mul being scalarized.
Differential Revision: https://reviews.llvm.org/D133355
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4a777789877a..f3c9d1cb19a9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13115,6 +13115,18 @@ bool AArch64TargetLowering::shouldSinkOperands(
continue;
ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
+
+ // If the Shuffle is a splat and the operand is a zext/sext, sinking the
+ // operand and the s/zext can help create indexed s/umull. This is
+ // especially useful to prevent i64 mul being scalarized.
+ if (Shuffle && isSplatShuffle(Shuffle) &&
+ match(Shuffle->getOperand(0), m_ZExtOrSExt(m_Value()))) {
+ Ops.push_back(&Shuffle->getOperandUse(0));
+ Ops.push_back(&Op);
+ IsProfitable = true;
+ continue;
+ }
+
if (!Shuffle || !Shuffle->isZeroEltSplat())
continue;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index a41f27efdee1..49edd6e11530 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -531,23 +531,14 @@ for.body: ; preds = %for.body.preheader1
define void @sink_v2z64_1(i32 *%p, i32 *%d, i64 %n, <2 x i32> %a) {
; CHECK-LABEL: sink_v2z64_1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: mov x9, xzr
-; CHECK-NEXT: dup v0.2d, v0.d[1]
-; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: .LBB6_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: fmov x10, d0
-; CHECK-NEXT: add x9, x9, #8
+; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: subs x2, x2, #8
-; CHECK-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-NEXT: fmov x11, d1
-; CHECK-NEXT: mov x12, v1.d[1]
-; CHECK-NEXT: mul x10, x11, x10
-; CHECK-NEXT: mul x11, x12, x8
-; CHECK-NEXT: fmov d1, x10
-; CHECK-NEXT: mov v1.d[1], x11
+; CHECK-NEXT: umull v1.2d, v1.2s, v0.s[1]
; CHECK-NEXT: shrn v1.2s, v1.2d, #15
; CHECK-NEXT: str d1, [x0], #32
; CHECK-NEXT: b.ne .LBB6_1
@@ -581,34 +572,18 @@ exit:
define void @sink_v4i64_1(i32 *%p, i32 *%d, i64 %n, <2 x i32> %a) {
; CHECK-LABEL: sink_v4i64_1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: mov x9, xzr
-; CHECK-NEXT: dup v0.2d, v0.d[1]
-; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: .LBB7_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr q1, [x0]
-; CHECK-NEXT: fmov x10, d0
-; CHECK-NEXT: fmov x13, d0
-; CHECK-NEXT: add x9, x9, #8
+; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: subs x2, x2, #8
-; CHECK-NEXT: sshll v2.2d, v1.2s, #0
-; CHECK-NEXT: sshll2 v1.2d, v1.4s, #0
-; CHECK-NEXT: fmov x11, d2
-; CHECK-NEXT: mov x12, v2.d[1]
-; CHECK-NEXT: fmov x14, d1
-; CHECK-NEXT: mul x10, x11, x10
-; CHECK-NEXT: mov x11, v1.d[1]
-; CHECK-NEXT: mul x13, x14, x13
-; CHECK-NEXT: mul x12, x12, x8
-; CHECK-NEXT: fmov d1, x10
-; CHECK-NEXT: mul x10, x11, x8
-; CHECK-NEXT: fmov d2, x13
-; CHECK-NEXT: mov v1.d[1], x12
-; CHECK-NEXT: mov v2.d[1], x10
-; CHECK-NEXT: shrn v1.2s, v1.2d, #15
-; CHECK-NEXT: shrn2 v1.4s, v2.2d, #15
-; CHECK-NEXT: str q1, [x0], #32
+; CHECK-NEXT: smull v2.2d, v1.2s, v0.s[1]
+; CHECK-NEXT: smull2 v1.2d, v1.4s, v0.s[1]
+; CHECK-NEXT: shrn v2.2s, v2.2d, #15
+; CHECK-NEXT: shrn2 v2.4s, v1.2d, #15
+; CHECK-NEXT: str q2, [x0], #32
; CHECK-NEXT: b.ne .LBB7_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
@@ -640,16 +615,14 @@ exit:
define void @sink_v8z16_0(i32 *%p, i32 *%d, i64 %n, <16 x i8> %a) {
; CHECK-LABEL: sink_v8z16_0:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: dup v0.8b, v0.b[0]
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: .LBB8_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: subs x2, x2, #8
-; CHECK-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-NEXT: mul v1.8h, v1.8h, v0.8h
+; CHECK-NEXT: umull v1.8h, v1.8b, v0.8b
; CHECK-NEXT: cmlt v1.8h, v1.8h, #0
; CHECK-NEXT: xtn v1.8b, v1.8h
; CHECK-NEXT: str d1, [x0], #32
@@ -684,22 +657,20 @@ exit:
define void @sink_v16s16_8(i32 *%p, i32 *%d, i64 %n, <16 x i8> %a) {
; CHECK-LABEL: sink_v16s16_8:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-NEXT: dup v1.8b, v0.b[10]
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: dup v0.8h, v0.h[2]
+; CHECK-NEXT: dup v0.16b, v0.b[10]
; CHECK-NEXT: .LBB9_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x0]
; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: subs x2, x2, #8
-; CHECK-NEXT: sshll2 v2.8h, v1.16b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: mul v2.8h, v2.8h, v0.8h
-; CHECK-NEXT: mul v1.8h, v1.8h, v0.8h
+; CHECK-NEXT: smull2 v3.8h, v2.16b, v0.16b
+; CHECK-NEXT: smull v2.8h, v2.8b, v1.8b
+; CHECK-NEXT: cmlt v3.8h, v3.8h, #0
; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
-; CHECK-NEXT: cmlt v1.8h, v1.8h, #0
-; CHECK-NEXT: uzp1 v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: str q1, [x0], #32
+; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-NEXT: str q2, [x0], #32
; CHECK-NEXT: b.ne .LBB9_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list