[llvm] [AArch64] Lower for power of 2 signed divides with scalar type (PR #97879)

Sat Jul 6 03:39:14 PDT 2024

https://github.com/vfdff updated https://github.com/llvm/llvm-project/pull/97879

>From e6650da0561180a1d40384422e526f5910a4b568 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Sat, 6 Jul 2024 02:10:50 -0400
Subject: [PATCH 1/2] [AArch64][SVE] Lower for power of 2 signed divides with
 scalar type

Expected same assemble for code which doesn't use sve registers
when we compile it with/without -msve-vector-bits=256.

Fix https://github.com/llvm/llvm-project/issues/97821
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp |  5 +++--
 .../AArch64/sve-fixed-length-sdiv-pow2.ll       | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9f6f66e9e0c70f..f072c9a9b21781 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17544,13 +17544,14 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
                                      SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   if (isIntDivCheap(N->getValueType(0), Attr))
-    return SDValue(N,0); // Lower SDIV as SDIV
+    return SDValue(N, 0); // Lower SDIV as SDIV
 
   EVT VT = N->getValueType(0);
 
   // For scalable and fixed types, mark them as cheap so we can handle it much
   // later. This allows us to handle larger than legal types.
-  if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
+  if (VT.isScalableVector() ||
+      (!VT.isScalarInteger() && Subtarget->useSVEForFixedLengthVectors()))
     return SDValue(N, 0);
 
   // fold (sdiv X, pow2)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 21a5abdeaa4d53..46c5ec375721c8 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -383,4 +383,21 @@ define void @sdiv_v32i64(ptr %a) vscale_range(16,0) #0 {
   ret void
 }
 
+define i32 @sdiv_int(i32 %begin, i32 %first) #0 {
+; CHECK-LABEL: sdiv_int:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    add w9, w8, #1
+; CHECK-NEXT:    add w10, w8, #2
+; CHECK-NEXT:    cmp w9, #0
+; CHECK-NEXT:    csinc w8, w10, w8, lt
+; CHECK-NEXT:    sub w0, w0, w8, asr #1
+; CHECK-NEXT:    ret
+  %sub = add i32 %begin, 1
+  %add = sub i32 %sub, %first
+  %div.neg = sdiv i32 %add, -2
+  %sub1 = add i32 %div.neg, %begin
+  ret i32 %sub1
+}
+
 attributes #0 = { "target-features"="+sve" }

>From d0038f96107db011bda86d1ab9bbb08b6b0e0f74 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Sat, 6 Jul 2024 04:54:11 -0400
Subject: [PATCH 2/2] [AArch64] Improve the codegen for sdiv 2

If X's size is BitWidth, then X sdiv 2 can be expressived as
 X += X >> (BitWidth - 1)
 X >> 1

Fix https://github.com/llvm/llvm-project/issues/97884
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 33 +++++++++++++------
 llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll  |  3 +-
 llvm/test/CodeGen/AArch64/sdivpow2.ll         |  5 ++-
 .../AArch64/sve-fixed-length-sdiv-pow2.ll     |  6 ++--
 4 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ad957aaa8f141d..0a43da86574c01 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6188,18 +6188,31 @@ SDValue TargetLowering::buildSDIVPow2WithCMov(
   SDLoc DL(N);
   SDValue N0 = N->getOperand(0);
   SDValue Zero = DAG.getConstant(0, DL, VT);
-  APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
-  SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
+  SDValue CMov;
 
-  // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
-  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
-  SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
-  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
-  SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+  if (Lg2 == 1) {
+    // If Divisor is 2, add 1 << (BitWidth -1) to it before shifting right.
+    unsigned BitWidth = VT.getSizeInBits();
+    SDValue SignVal = DAG.getNode(ISD::SRL, DL, VT, N0,
+                                  DAG.getConstant(BitWidth - 1, DL, VT));
+    CMov = DAG.getNode(ISD::ADD, DL, VT, N0, SignVal);
+
+    Created.push_back(SignVal.getNode());
+    Created.push_back(CMov.getNode());
+  } else {
+    APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
+    SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
 
-  Created.push_back(Cmp.getNode());
-  Created.push_back(Add.getNode());
-  Created.push_back(CMov.getNode());
+    // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+    EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+    SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+    CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+    Created.push_back(Cmp.getNode());
+    Created.push_back(Add.getNode());
+    Created.push_back(CMov.getNode());
+  }
 
   // Divide by pow2.
   SDValue SRA =
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
index 3a17a95ed71da2..6431cfc58a54d2 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
@@ -202,9 +202,8 @@ define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    sub sp, sp, #32
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    add w8, w0, w0, lsr #31
 ; CHECK-SD-NEXT:    mov w9, wzr
-; CHECK-SD-NEXT:    cinc w8, w0, lt
 ; CHECK-SD-NEXT:    asr w8, w8, #1
 ; CHECK-SD-NEXT:  .LBB11_1: // %do.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AArch64/sdivpow2.ll b/llvm/test/CodeGen/AArch64/sdivpow2.ll
index d5ac724a67727f..061a0ed4f75599 100644
--- a/llvm/test/CodeGen/AArch64/sdivpow2.ll
+++ b/llvm/test/CodeGen/AArch64/sdivpow2.ll
@@ -77,7 +77,7 @@ define i64 @test6(i64 %x) {
 define i64 @test7(i64 %x) {
 ; CHECK-LABEL: test7:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #281474976710655
+; CHECK-NEXT:    mov x8, #281474976710655 // =0xffffffffffff
 ; CHECK-NEXT:    cmp x0, #0
 ; CHECK-NEXT:    add x8, x0, x8
 ; CHECK-NEXT:    csel x8, x8, x0, lt
@@ -90,8 +90,7 @@ define i64 @test7(i64 %x) {
 define i64 @test8(i64 %x) {
 ; ISEL-LABEL: test8:
 ; ISEL:       // %bb.0:
-; ISEL-NEXT:    cmp x0, #0
-; ISEL-NEXT:    cinc x8, x0, lt
+; ISEL-NEXT:    add x8, x0, x0, lsr #63
 ; ISEL-NEXT:    asr x0, x8, #1
 ; ISEL-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 46c5ec375721c8..1c69321830ed55 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -387,10 +387,8 @@ define i32 @sdiv_int(i32 %begin, i32 %first) #0 {
 ; CHECK-LABEL: sdiv_int:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    add w9, w8, #1
-; CHECK-NEXT:    add w10, w8, #2
-; CHECK-NEXT:    cmp w9, #0
-; CHECK-NEXT:    csinc w8, w10, w8, lt
+; CHECK-NEXT:    add w8, w8, #1
+; CHECK-NEXT:    add w8, w8, w8, lsr #31
 ; CHECK-NEXT:    sub w0, w0, w8, asr #1
 ; CHECK-NEXT:    ret
   %sub = add i32 %begin, 1