[llvm] [AArch64] Improve the codegen for sdiv 2 (PR #98324)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 10 07:06:22 PDT 2024


https://github.com/vfdff created https://github.com/llvm/llvm-project/pull/98324

Follow PR97879, if X's size is BitWidth, then X sdiv 2 can be expressived as
```
  X += X >> (BitWidth - 1)
  X = X >> 1
```

Fix https://github.com/llvm/llvm-project/issues/97884

>From 1143e6fe4caa5d21345db9c7d877567a91006483 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Sat, 6 Jul 2024 04:54:11 -0400
Subject: [PATCH] [AArch64] Improve the codegen for sdiv 2

If X's size is BitWidth, then X sdiv 2 can be expressived as
 X += X >> (BitWidth - 1)
 X >> 1

Fix https://github.com/llvm/llvm-project/issues/97884
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  33 ++-
 llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll  |   3 +-
 llvm/test/CodeGen/AArch64/sdivpow2.ll         |   9 +-
 llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll     | 190 +++++-------------
 4 files changed, 81 insertions(+), 154 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 458f962802b4c..2b2d70976eb02 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6182,18 +6182,31 @@ SDValue TargetLowering::buildSDIVPow2WithCMov(
   SDLoc DL(N);
   SDValue N0 = N->getOperand(0);
   SDValue Zero = DAG.getConstant(0, DL, VT);
-  APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
-  SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
+  SDValue CMov;
 
-  // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
-  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
-  SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
-  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
-  SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+  if (Lg2 == 1) {
+    // If Divisor is 2, add 1 << (BitWidth -1) to it before shifting right.
+    unsigned BitWidth = VT.getSizeInBits();
+    SDValue SignVal = DAG.getNode(ISD::SRL, DL, VT, N0,
+                                  DAG.getConstant(BitWidth - 1, DL, VT));
+    CMov = DAG.getNode(ISD::ADD, DL, VT, N0, SignVal);
+
+    Created.push_back(SignVal.getNode());
+    Created.push_back(CMov.getNode());
+  } else {
+    APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
+    SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
 
-  Created.push_back(Cmp.getNode());
-  Created.push_back(Add.getNode());
-  Created.push_back(CMov.getNode());
+    // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+    EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+    SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+    CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+    Created.push_back(Cmp.getNode());
+    Created.push_back(Add.getNode());
+    Created.push_back(CMov.getNode());
+  }
 
   // Divide by pow2.
   SDValue SRA =
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
index 3a17a95ed71da..6431cfc58a54d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
@@ -202,9 +202,8 @@ define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    sub sp, sp, #32
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    add w8, w0, w0, lsr #31
 ; CHECK-SD-NEXT:    mov w9, wzr
-; CHECK-SD-NEXT:    cinc w8, w0, lt
 ; CHECK-SD-NEXT:    asr w8, w8, #1
 ; CHECK-SD-NEXT:  .LBB11_1: // %do.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AArch64/sdivpow2.ll b/llvm/test/CodeGen/AArch64/sdivpow2.ll
index 4619534151814..2551be8555ce6 100644
--- a/llvm/test/CodeGen/AArch64/sdivpow2.ll
+++ b/llvm/test/CodeGen/AArch64/sdivpow2.ll
@@ -90,8 +90,7 @@ define i64 @test7(i64 %x) {
 define i64 @test8(i64 %x) {
 ; ISEL-LABEL: test8:
 ; ISEL:       // %bb.0:
-; ISEL-NEXT:    cmp x0, #0
-; ISEL-NEXT:    cinc x8, x0, lt
+; ISEL-NEXT:    add x8, x0, x0, lsr #63
 ; ISEL-NEXT:    asr x0, x8, #1
 ; ISEL-NEXT:    ret
 ;
@@ -110,10 +109,8 @@ define i32 @sdiv_int(i32 %begin, i32 %first) #0 {
 ; ISEL-LABEL: sdiv_int:
 ; ISEL:       // %bb.0:
 ; ISEL-NEXT:    sub w8, w0, w1
-; ISEL-NEXT:    add w9, w8, #1
-; ISEL-NEXT:    add w10, w8, #2
-; ISEL-NEXT:    cmp w9, #0
-; ISEL-NEXT:    csinc w8, w10, w8, lt
+; ISEL-NEXT:    add w8, w8, #1
+; ISEL-NEXT:    add w8, w8, w8, lsr #31
 ; ISEL-NEXT:    sub w0, w0, w8, asr #1
 ; ISEL-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
index f7dda82885678..55742e12db6c0 100644
--- a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
+++ b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
@@ -5,177 +5,95 @@
 ; RUN:   | FileCheck -check-prefixes=CHECK,SFB %s
 
 define signext i32 @sdiv2_32(i32 signext %0) {
-; NOSFB-LABEL: sdiv2_32:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srliw a1, a0, 31
-; NOSFB-NEXT:    add a0, a0, a1
-; NOSFB-NEXT:    sraiw a0, a0, 1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sdiv2_32:
-; SFB:       # %bb.0:
-; SFB-NEXT:    bgez a0, .LBB0_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a0, a0, 1
-; SFB-NEXT:  .LBB0_2:
-; SFB-NEXT:    sraiw a0, a0, 1
-; SFB-NEXT:    ret
+; CHECK-LABEL: sdiv2_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a1, a0, 31
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    sraiw a0, a0, 1
+; CHECK-NEXT:    ret
   %res = sdiv i32 %0, 2
   ret i32 %res
 }
 
 define signext i32 @sdivneg2_32(i32 signext %0) {
-; NOSFB-LABEL: sdivneg2_32:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srliw a1, a0, 31
-; NOSFB-NEXT:    add a0, a0, a1
-; NOSFB-NEXT:    sraiw a0, a0, 1
-; NOSFB-NEXT:    neg a0, a0
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sdivneg2_32:
-; SFB:       # %bb.0:
-; SFB-NEXT:    bgez a0, .LBB1_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a0, a0, 1
-; SFB-NEXT:  .LBB1_2:
-; SFB-NEXT:    sraiw a0, a0, 1
-; SFB-NEXT:    neg a0, a0
-; SFB-NEXT:    ret
+; CHECK-LABEL: sdivneg2_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a1, a0, 31
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    sraiw a0, a0, 1
+; CHECK-NEXT:    neg a0, a0
+; CHECK-NEXT:    ret
   %res = sdiv i32 %0, -2
   ret i32 %res
 }
 
 define i64 @sdiv2_64(i64 %0) {
-; NOSFB-LABEL: sdiv2_64:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srli a1, a0, 63
-; NOSFB-NEXT:    add a0, a0, a1
-; NOSFB-NEXT:    srai a0, a0, 1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sdiv2_64:
-; SFB:       # %bb.0:
-; SFB-NEXT:    bgez a0, .LBB2_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a0, a0, 1
-; SFB-NEXT:  .LBB2_2:
-; SFB-NEXT:    srai a0, a0, 1
-; SFB-NEXT:    ret
+; CHECK-LABEL: sdiv2_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srli a1, a0, 63
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    srai a0, a0, 1
+; CHECK-NEXT:    ret
   %res = sdiv i64 %0, 2
   ret i64 %res
 }
 
 define i64 @sdivneg2_64(i64 %0) {
-; NOSFB-LABEL: sdivneg2_64:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srli a1, a0, 63
-; NOSFB-NEXT:    add a0, a0, a1
-; NOSFB-NEXT:    srai a0, a0, 1
-; NOSFB-NEXT:    neg a0, a0
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sdivneg2_64:
-; SFB:       # %bb.0:
-; SFB-NEXT:    bgez a0, .LBB3_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a0, a0, 1
-; SFB-NEXT:  .LBB3_2:
-; SFB-NEXT:    srai a0, a0, 1
-; SFB-NEXT:    neg a0, a0
-; SFB-NEXT:    ret
+; CHECK-LABEL: sdivneg2_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srli a1, a0, 63
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    srai a0, a0, 1
+; CHECK-NEXT:    neg a0, a0
+; CHECK-NEXT:    ret
   %res = sdiv i64 %0, -2
   ret i64 %res
 }
 
 define signext i32 @srem2_32(i32 signext %0) {
-; NOSFB-LABEL: srem2_32:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srliw a1, a0, 31
-; NOSFB-NEXT:    add a1, a1, a0
-; NOSFB-NEXT:    andi a1, a1, -2
-; NOSFB-NEXT:    subw a0, a0, a1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: srem2_32:
-; SFB:       # %bb.0:
-; SFB-NEXT:    mv a1, a0
-; SFB-NEXT:    bgez a0, .LBB4_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a1, a0, 1
-; SFB-NEXT:  .LBB4_2:
-; SFB-NEXT:    andi a1, a1, -2
-; SFB-NEXT:    subw a0, a0, a1
-; SFB-NEXT:    ret
+; CHECK-LABEL: srem2_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a1, a0, 31
+; CHECK-NEXT:    add a1, a1, a0
+; CHECK-NEXT:    andi a1, a1, -2
+; CHECK-NEXT:    subw a0, a0, a1
+; CHECK-NEXT:    ret
   %res = srem i32 %0, 2
   ret i32 %res
 }
 
 define signext i32 @sremneg2_32(i32 signext %0) {
-; NOSFB-LABEL: sremneg2_32:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srliw a1, a0, 31
-; NOSFB-NEXT:    add a1, a1, a0
-; NOSFB-NEXT:    andi a1, a1, -2
-; NOSFB-NEXT:    subw a0, a0, a1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sremneg2_32:
-; SFB:       # %bb.0:
-; SFB-NEXT:    mv a1, a0
-; SFB-NEXT:    bgez a0, .LBB5_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a1, a0, 1
-; SFB-NEXT:  .LBB5_2:
-; SFB-NEXT:    andi a1, a1, -2
-; SFB-NEXT:    subw a0, a0, a1
-; SFB-NEXT:    ret
+; CHECK-LABEL: sremneg2_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a1, a0, 31
+; CHECK-NEXT:    add a1, a1, a0
+; CHECK-NEXT:    andi a1, a1, -2
+; CHECK-NEXT:    subw a0, a0, a1
+; CHECK-NEXT:    ret
   %res = srem i32 %0, -2
   ret i32 %res
 }
 
 define i64 @srem2_64(i64 %0) {
-; NOSFB-LABEL: srem2_64:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srli a1, a0, 63
-; NOSFB-NEXT:    add a1, a1, a0
-; NOSFB-NEXT:    andi a1, a1, -2
-; NOSFB-NEXT:    sub a0, a0, a1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: srem2_64:
-; SFB:       # %bb.0:
-; SFB-NEXT:    mv a1, a0
-; SFB-NEXT:    bgez a0, .LBB6_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a1, a0, 1
-; SFB-NEXT:  .LBB6_2:
-; SFB-NEXT:    andi a1, a1, -2
-; SFB-NEXT:    sub a0, a0, a1
-; SFB-NEXT:    ret
+; CHECK-LABEL: srem2_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srli a1, a0, 63
+; CHECK-NEXT:    add a1, a1, a0
+; CHECK-NEXT:    andi a1, a1, -2
+; CHECK-NEXT:    sub a0, a0, a1
+; CHECK-NEXT:    ret
   %res = srem i64 %0, 2
   ret i64 %res
 }
 
 define i64 @sremneg2_64(i64 %0) {
-; NOSFB-LABEL: sremneg2_64:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srli a1, a0, 63
-; NOSFB-NEXT:    add a1, a1, a0
-; NOSFB-NEXT:    andi a1, a1, -2
-; NOSFB-NEXT:    sub a0, a0, a1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sremneg2_64:
-; SFB:       # %bb.0:
-; SFB-NEXT:    mv a1, a0
-; SFB-NEXT:    bgez a0, .LBB7_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a1, a0, 1
-; SFB-NEXT:  .LBB7_2:
-; SFB-NEXT:    andi a1, a1, -2
-; SFB-NEXT:    sub a0, a0, a1
-; SFB-NEXT:    ret
+; CHECK-LABEL: sremneg2_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srli a1, a0, 63
+; CHECK-NEXT:    add a1, a1, a0
+; CHECK-NEXT:    andi a1, a1, -2
+; CHECK-NEXT:    sub a0, a0, a1
+; CHECK-NEXT:    ret
   %res = srem i64 %0, -2
   ret i64 %res
 }



More information about the llvm-commits mailing list