[llvm] [AArch64] Convert negative constant aarch64_neon_sshl to VASHR (PR #68918)

Thu Oct 12 11:15:35 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

In replacing shifts by splat with constant shifts, we can handle negative shifts by flipping the sign and using a VASHR or VLSHR.

---
Full diff: https://github.com/llvm/llvm-project/pull/68918.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+8-3) 
- (modified) llvm/test/CodeGen/AArch64/arm64-vshift.ll (+4-9) 


``````````diff

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 64d00dafd835b11..a16a102e472e709 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19100,9 +19100,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
   case Intrinsic::aarch64_neon_sshl:
   case Intrinsic::aarch64_neon_ushl:
     // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
-    // left shift for positive shift amounts. Below, we only replace the current
-    // node with VSHL, if this condition is met.
-    Opcode = AArch64ISD::VSHL;
+    // left shift for positive shift amounts. For negative shifts we can use a
+    // VASHR/VLSHR as appropiate.
+    if (ShiftAmount < 0) {
+      Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR
+                                                   : AArch64ISD::VLSHR;
+      ShiftAmount = -ShiftAmount;
+    } else
+      Opcode = AArch64ISD::VSHL;
     IsRightShift = false;
     break;
   }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 367c3be242a17fa..1dfd977186b0e73 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2130,9 +2130,8 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind {
 ; CHECK-LABEL: neon.ushll4s_neg_constant_shift:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
-; CHECK-NEXT:    movi.2d v1, #0xffffffffffffffff
 ; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ushl.4s v0, v0, v1
+; CHECK-NEXT:    ushr.4s v0, v0, #1
 ; CHECK-NEXT:    ret
   %tmp1 = load <4 x i16>, ptr %A
   %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
@@ -2250,9 +2249,8 @@ define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind {
 define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind {
 ; CHECK-LABEL: neon.sshl16b_neg_constant_shift:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi.16b v1, #254
 ; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    sshl.16b v0, v0, v1
+; CHECK-NEXT:    sshr.16b v0, v0, #2
 ; CHECK-NEXT:    ret
   %tmp1 = load <16 x i8>, ptr %A
   %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
@@ -2300,9 +2298,8 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind {
 ; CHECK-LABEL: neon.sshll4s_neg_constant_shift:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
-; CHECK-NEXT:    movi.2d v1, #0xffffffffffffffff
 ; CHECK-NEXT:    sshll.4s v0, v0, #0
-; CHECK-NEXT:    sshl.4s v0, v0, v1
+; CHECK-NEXT:    sshr.4s v0, v0, #1
 ; CHECK-NEXT:    ret
   %tmp1 = load <4 x i16>, ptr %A
   %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
@@ -2377,10 +2374,8 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind {
 ; CHECK-LABEL: neon.sshll_scalar_constant_shift_m1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    fmov d1, x9
 ; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    sshl d0, d0, d1
+; CHECK-NEXT:    sshr d0, d0, #1
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %tmp1 = load i32, ptr %A

``````````

</details>


https://github.com/llvm/llvm-project/pull/68918