[llvm] 993b203 - [AArch64] Sink splat(s/zext(..)) to uses

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 13 07:47:51 PDT 2022


Author: David Green
Date: 2022-09-13T15:47:41+01:00
New Revision: 993b203b6a0ce62e1d16a31bc21b45540cad227c

URL: https://github.com/llvm/llvm-project/commit/993b203b6a0ce62e1d16a31bc21b45540cad227c
DIFF: https://github.com/llvm/llvm-project/commit/993b203b6a0ce62e1d16a31bc21b45540cad227c.diff

LOG: [AArch64] Sink splat(s/zext(..)) to uses

If the Shuffle is a splat and the operand is a zext/sext, sinking the
operand and the s/zext can help create indexed s/umull. This is
especially useful to prevent i64 mul being scalarized.

Differential Revision: https://reviews.llvm.org/D133355

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4a777789877a..f3c9d1cb19a9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13115,6 +13115,18 @@ bool AArch64TargetLowering::shouldSinkOperands(
         continue;
 
       ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
+
+      // If the Shuffle is a splat and the operand is a zext/sext, sinking the
+      // operand and the s/zext can help create indexed s/umull. This is
+      // especially useful to prevent i64 mul being scalarized.
+      if (Shuffle && isSplatShuffle(Shuffle) &&
+          match(Shuffle->getOperand(0), m_ZExtOrSExt(m_Value()))) {
+        Ops.push_back(&Shuffle->getOperandUse(0));
+        Ops.push_back(&Op);
+        IsProfitable = true;
+        continue;
+      }
+
       if (!Shuffle || !Shuffle->isZeroEltSplat())
         continue;
 

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index a41f27efdee1..49edd6e11530 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -531,23 +531,14 @@ for.body:                                         ; preds = %for.body.preheader1
 define void @sink_v2z64_1(i32 *%p, i32 *%d, i64 %n, <2 x i32> %a) {
 ; CHECK-LABEL: sink_v2z64_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-NEXT:    mov x9, xzr
-; CHECK-NEXT:    dup v0.2d, v0.d[1]
-; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:  .LBB6_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr d1, [x0]
-; CHECK-NEXT:    fmov x10, d0
-; CHECK-NEXT:    add x9, x9, #8
+; CHECK-NEXT:    add x8, x8, #8
 ; CHECK-NEXT:    subs x2, x2, #8
-; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-NEXT:    fmov x11, d1
-; CHECK-NEXT:    mov x12, v1.d[1]
-; CHECK-NEXT:    mul x10, x11, x10
-; CHECK-NEXT:    mul x11, x12, x8
-; CHECK-NEXT:    fmov d1, x10
-; CHECK-NEXT:    mov v1.d[1], x11
+; CHECK-NEXT:    umull v1.2d, v1.2s, v0.s[1]
 ; CHECK-NEXT:    shrn v1.2s, v1.2d, #15
 ; CHECK-NEXT:    str d1, [x0], #32
 ; CHECK-NEXT:    b.ne .LBB6_1
@@ -581,34 +572,18 @@ exit:
 define void @sink_v4i64_1(i32 *%p, i32 *%d, i64 %n, <2 x i32> %a) {
 ; CHECK-LABEL: sink_v4i64_1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-NEXT:    mov x9, xzr
-; CHECK-NEXT:    dup v0.2d, v0.d[1]
-; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    mov x8, xzr
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:  .LBB7_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr q1, [x0]
-; CHECK-NEXT:    fmov x10, d0
-; CHECK-NEXT:    fmov x13, d0
-; CHECK-NEXT:    add x9, x9, #8
+; CHECK-NEXT:    add x8, x8, #8
 ; CHECK-NEXT:    subs x2, x2, #8
-; CHECK-NEXT:    sshll v2.2d, v1.2s, #0
-; CHECK-NEXT:    sshll2 v1.2d, v1.4s, #0
-; CHECK-NEXT:    fmov x11, d2
-; CHECK-NEXT:    mov x12, v2.d[1]
-; CHECK-NEXT:    fmov x14, d1
-; CHECK-NEXT:    mul x10, x11, x10
-; CHECK-NEXT:    mov x11, v1.d[1]
-; CHECK-NEXT:    mul x13, x14, x13
-; CHECK-NEXT:    mul x12, x12, x8
-; CHECK-NEXT:    fmov d1, x10
-; CHECK-NEXT:    mul x10, x11, x8
-; CHECK-NEXT:    fmov d2, x13
-; CHECK-NEXT:    mov v1.d[1], x12
-; CHECK-NEXT:    mov v2.d[1], x10
-; CHECK-NEXT:    shrn v1.2s, v1.2d, #15
-; CHECK-NEXT:    shrn2 v1.4s, v2.2d, #15
-; CHECK-NEXT:    str q1, [x0], #32
+; CHECK-NEXT:    smull v2.2d, v1.2s, v0.s[1]
+; CHECK-NEXT:    smull2 v1.2d, v1.4s, v0.s[1]
+; CHECK-NEXT:    shrn v2.2s, v2.2d, #15
+; CHECK-NEXT:    shrn2 v2.4s, v1.2d, #15
+; CHECK-NEXT:    str q2, [x0], #32
 ; CHECK-NEXT:    b.ne .LBB7_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
@@ -640,16 +615,14 @@ exit:
 define void @sink_v8z16_0(i32 *%p, i32 *%d, i64 %n, <16 x i8> %a) {
 ; CHECK-LABEL: sink_v8z16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-NEXT:    mov x8, xzr
-; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:  .LBB8_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr d1, [x0]
 ; CHECK-NEXT:    add x8, x8, #8
 ; CHECK-NEXT:    subs x2, x2, #8
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    mul v1.8h, v1.8h, v0.8h
+; CHECK-NEXT:    umull v1.8h, v1.8b, v0.8b
 ; CHECK-NEXT:    cmlt v1.8h, v1.8h, #0
 ; CHECK-NEXT:    xtn v1.8b, v1.8h
 ; CHECK-NEXT:    str d1, [x0], #32
@@ -684,22 +657,20 @@ exit:
 define void @sink_v16s16_8(i32 *%p, i32 *%d, i64 %n, <16 x i8> %a) {
 ; CHECK-LABEL: sink_v16s16_8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll2 v0.8h, v0.16b, #0
+; CHECK-NEXT:    dup v1.8b, v0.b[10]
 ; CHECK-NEXT:    mov x8, xzr
-; CHECK-NEXT:    dup v0.8h, v0.h[2]
+; CHECK-NEXT:    dup v0.16b, v0.b[10]
 ; CHECK-NEXT:  .LBB9_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr q1, [x0]
+; CHECK-NEXT:    ldr q2, [x0]
 ; CHECK-NEXT:    add x8, x8, #8
 ; CHECK-NEXT:    subs x2, x2, #8
-; CHECK-NEXT:    sshll2 v2.8h, v1.16b, #0
-; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
-; CHECK-NEXT:    mul v2.8h, v2.8h, v0.8h
-; CHECK-NEXT:    mul v1.8h, v1.8h, v0.8h
+; CHECK-NEXT:    smull2 v3.8h, v2.16b, v0.16b
+; CHECK-NEXT:    smull v2.8h, v2.8b, v1.8b
+; CHECK-NEXT:    cmlt v3.8h, v3.8h, #0
 ; CHECK-NEXT:    cmlt v2.8h, v2.8h, #0
-; CHECK-NEXT:    cmlt v1.8h, v1.8h, #0
-; CHECK-NEXT:    uzp1 v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    str q1, [x0], #32
+; CHECK-NEXT:    uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-NEXT:    str q2, [x0], #32
 ; CHECK-NEXT:    b.ne .LBB9_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret


        


More information about the llvm-commits mailing list