[llvm] 4266815 - [AArch64] Convert negative constant aarch64_neon_sshl to VASHR (#68918)

Tue Oct 17 10:41:27 PDT 2023

Author: David Green
Date: 2023-10-17T18:41:23+01:00
New Revision: 4266815f4d82bd7571bf6ae85eb15fcc0b3ae37e

URL: https://github.com/llvm/llvm-project/commit/4266815f4d82bd7571bf6ae85eb15fcc0b3ae37e
DIFF: https://github.com/llvm/llvm-project/commit/4266815f4d82bd7571bf6ae85eb15fcc0b3ae37e.diff

LOG: [AArch64] Convert negative constant aarch64_neon_sshl to VASHR (#68918)

In replacing shifts by splat with constant shifts, we can handle
negative shifts by flipping the sign and using a VASHR or VLSHR.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/arm64-vshift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 64d00dafd835b11..a16a102e472e709 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19100,9 +19100,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
   case Intrinsic::aarch64_neon_sshl:
   case Intrinsic::aarch64_neon_ushl:
     // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
-    // left shift for positive shift amounts. Below, we only replace the current
-    // node with VSHL, if this condition is met.
-    Opcode = AArch64ISD::VSHL;
+    // left shift for positive shift amounts. For negative shifts we can use a
+    // VASHR/VLSHR as appropiate.
+    if (ShiftAmount < 0) {
+      Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR
+                                                   : AArch64ISD::VLSHR;
+      ShiftAmount = -ShiftAmount;
+    } else
+      Opcode = AArch64ISD::VSHL;
     IsRightShift = false;
     break;
   }

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 367c3be242a17fa..1dfd977186b0e73 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2130,9 +2130,8 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind {
 ; CHECK-LABEL: neon.ushll4s_neg_constant_shift:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
-; CHECK-NEXT:    movi.2d v1, #0xffffffffffffffff
 ; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ushl.4s v0, v0, v1
+; CHECK-NEXT:    ushr.4s v0, v0, #1
 ; CHECK-NEXT:    ret
   %tmp1 = load <4 x i16>, ptr %A
   %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
@@ -2250,9 +2249,8 @@ define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind {
 define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind {
 ; CHECK-LABEL: neon.sshl16b_neg_constant_shift:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi.16b v1, #254
 ; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    sshl.16b v0, v0, v1
+; CHECK-NEXT:    sshr.16b v0, v0, #2
 ; CHECK-NEXT:    ret
   %tmp1 = load <16 x i8>, ptr %A
   %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
@@ -2300,9 +2298,8 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind {
 ; CHECK-LABEL: neon.sshll4s_neg_constant_shift:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
-; CHECK-NEXT:    movi.2d v1, #0xffffffffffffffff
 ; CHECK-NEXT:    sshll.4s v0, v0, #0
-; CHECK-NEXT:    sshl.4s v0, v0, v1
+; CHECK-NEXT:    sshr.4s v0, v0, #1
 ; CHECK-NEXT:    ret
   %tmp1 = load <4 x i16>, ptr %A
   %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
@@ -2377,10 +2374,8 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind {
 ; CHECK-LABEL: neon.sshll_scalar_constant_shift_m1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    fmov d1, x9
 ; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    sshl d0, d0, d1
+; CHECK-NEXT:    sshr d0, d0, #1
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %tmp1 = load i32, ptr %A