[llvm] [AArch64] Lower for power of 2 signed divides with scalar type (PR #97879)

Mon Jul 8 06:46:43 PDT 2024

https://github.com/vfdff updated https://github.com/llvm/llvm-project/pull/97879

>From 33d0a2ee7da308a7c6e92ac7d56777a95004ac30 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Sat, 6 Jul 2024 02:10:50 -0400
Subject: [PATCH] [AArch64][SVE] Lower for power of 2 signed divides with
 scalar type

Expected same assemble for code which doesn't use sve registers
when we compile it with/without -msve-vector-bits=256.

Fix https://github.com/llvm/llvm-project/issues/97821
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  5 +--
 llvm/test/CodeGen/AArch64/sdivpow2.ll         | 31 ++++++++++++++++++-
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9f6f66e9e0c70f..a2cb34deddb08f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17544,13 +17544,14 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
                                      SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   if (isIntDivCheap(N->getValueType(0), Attr))
-    return SDValue(N,0); // Lower SDIV as SDIV
+    return SDValue(N, 0); // Lower SDIV as SDIV
 
   EVT VT = N->getValueType(0);
 
   // For scalable and fixed types, mark them as cheap so we can handle it much
   // later. This allows us to handle larger than legal types.
-  if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
+  if (VT.isScalableVector() ||
+      (VT.isFixedLengthVector() && Subtarget->useSVEForFixedLengthVectors()))
     return SDValue(N, 0);
 
   // fold (sdiv X, pow2)
diff --git a/llvm/test/CodeGen/AArch64/sdivpow2.ll b/llvm/test/CodeGen/AArch64/sdivpow2.ll
index d5ac724a67727f..4619534151814a 100644
--- a/llvm/test/CodeGen/AArch64/sdivpow2.ll
+++ b/llvm/test/CodeGen/AArch64/sdivpow2.ll
@@ -77,7 +77,7 @@ define i64 @test6(i64 %x) {
 define i64 @test7(i64 %x) {
 ; CHECK-LABEL: test7:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #281474976710655
+; CHECK-NEXT:    mov x8, #281474976710655 // =0xffffffffffff
 ; CHECK-NEXT:    cmp x0, #0
 ; CHECK-NEXT:    add x8, x0, x8
 ; CHECK-NEXT:    csel x8, x8, x0, lt
@@ -106,3 +106,32 @@ define i64 @test8(i64 %x) {
   ret i64 %div
 }
 
+define i32 @sdiv_int(i32 %begin, i32 %first) #0 {
+; ISEL-LABEL: sdiv_int:
+; ISEL:       // %bb.0:
+; ISEL-NEXT:    sub w8, w0, w1
+; ISEL-NEXT:    add w9, w8, #1
+; ISEL-NEXT:    add w10, w8, #2
+; ISEL-NEXT:    cmp w9, #0
+; ISEL-NEXT:    csinc w8, w10, w8, lt
+; ISEL-NEXT:    sub w0, w0, w8, asr #1
+; ISEL-NEXT:    ret
+;
+; FAST-LABEL: sdiv_int:
+; FAST:       // %bb.0:
+; FAST-NEXT:    add w8, w0, #1
+; FAST-NEXT:    sub w8, w8, w1
+; FAST-NEXT:    add w9, w8, #1
+; FAST-NEXT:    cmp w8, #0
+; FAST-NEXT:    csel w8, w9, w8, lt
+; FAST-NEXT:    neg w8, w8, asr #1
+; FAST-NEXT:    add w0, w8, w0
+; FAST-NEXT:    ret
+  %sub = add i32 %begin, 1
+  %add = sub i32 %sub, %first
+  %div.neg = sdiv i32 %add, -2
+  %sub1 = add i32 %div.neg, %begin
+  ret i32 %sub1
+}
+
+attributes #0 = { "target-features"="+sve" vscale_range(2,2) }