[llvm] [AArch64] Lower for power of 2 signed divides with scalar type (PR #97879)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 6 03:39:14 PDT 2024
https://github.com/vfdff updated https://github.com/llvm/llvm-project/pull/97879
>From e6650da0561180a1d40384422e526f5910a4b568 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Sat, 6 Jul 2024 02:10:50 -0400
Subject: [PATCH 1/2] [AArch64][SVE] Lower for power of 2 signed divides with
scalar type
Expected same assemble for code which doesn't use sve registers
when we compile it with/without -msve-vector-bits=256.
Fix https://github.com/llvm/llvm-project/issues/97821
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 5 +++--
.../AArch64/sve-fixed-length-sdiv-pow2.ll | 17 +++++++++++++++++
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9f6f66e9e0c70f..f072c9a9b21781 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17544,13 +17544,14 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
- return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue(N, 0); // Lower SDIV as SDIV
EVT VT = N->getValueType(0);
// For scalable and fixed types, mark them as cheap so we can handle it much
// later. This allows us to handle larger than legal types.
- if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
+ if (VT.isScalableVector() ||
+ (!VT.isScalarInteger() && Subtarget->useSVEForFixedLengthVectors()))
return SDValue(N, 0);
// fold (sdiv X, pow2)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 21a5abdeaa4d53..46c5ec375721c8 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -383,4 +383,21 @@ define void @sdiv_v32i64(ptr %a) vscale_range(16,0) #0 {
ret void
}
+define i32 @sdiv_int(i32 %begin, i32 %first) #0 {
+; CHECK-LABEL: sdiv_int:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: add w9, w8, #1
+; CHECK-NEXT: add w10, w8, #2
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csinc w8, w10, w8, lt
+; CHECK-NEXT: sub w0, w0, w8, asr #1
+; CHECK-NEXT: ret
+ %sub = add i32 %begin, 1
+ %add = sub i32 %sub, %first
+ %div.neg = sdiv i32 %add, -2
+ %sub1 = add i32 %div.neg, %begin
+ ret i32 %sub1
+}
+
attributes #0 = { "target-features"="+sve" }
>From d0038f96107db011bda86d1ab9bbb08b6b0e0f74 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Sat, 6 Jul 2024 04:54:11 -0400
Subject: [PATCH 2/2] [AArch64] Improve the codegen for sdiv 2
If X's size is BitWidth, then X sdiv 2 can be expressived as
X += X >> (BitWidth - 1)
X >> 1
Fix https://github.com/llvm/llvm-project/issues/97884
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 33 +++++++++++++------
llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll | 3 +-
llvm/test/CodeGen/AArch64/sdivpow2.ll | 5 ++-
.../AArch64/sve-fixed-length-sdiv-pow2.ll | 6 ++--
4 files changed, 28 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ad957aaa8f141d..0a43da86574c01 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6188,18 +6188,31 @@ SDValue TargetLowering::buildSDIVPow2WithCMov(
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue Zero = DAG.getConstant(0, DL, VT);
- APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
- SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
+ SDValue CMov;
- // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
- EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
- SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+ if (Lg2 == 1) {
+ // If Divisor is 2, add 1 << (BitWidth -1) to it before shifting right.
+ unsigned BitWidth = VT.getSizeInBits();
+ SDValue SignVal = DAG.getNode(ISD::SRL, DL, VT, N0,
+ DAG.getConstant(BitWidth - 1, DL, VT));
+ CMov = DAG.getNode(ISD::ADD, DL, VT, N0, SignVal);
+
+ Created.push_back(SignVal.getNode());
+ Created.push_back(CMov.getNode());
+ } else {
+ APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
+ SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
- Created.push_back(Cmp.getNode());
- Created.push_back(Add.getNode());
- Created.push_back(CMov.getNode());
+ // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CMov.getNode());
+ }
// Divide by pow2.
SDValue SRA =
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
index 3a17a95ed71da2..6431cfc58a54d2 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
@@ -202,9 +202,8 @@ define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #32
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: add w8, w0, w0, lsr #31
; CHECK-SD-NEXT: mov w9, wzr
-; CHECK-SD-NEXT: cinc w8, w0, lt
; CHECK-SD-NEXT: asr w8, w8, #1
; CHECK-SD-NEXT: .LBB11_1: // %do.body
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AArch64/sdivpow2.ll b/llvm/test/CodeGen/AArch64/sdivpow2.ll
index d5ac724a67727f..061a0ed4f75599 100644
--- a/llvm/test/CodeGen/AArch64/sdivpow2.ll
+++ b/llvm/test/CodeGen/AArch64/sdivpow2.ll
@@ -77,7 +77,7 @@ define i64 @test6(i64 %x) {
define i64 @test7(i64 %x) {
; CHECK-LABEL: test7:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #281474976710655
+; CHECK-NEXT: mov x8, #281474976710655 // =0xffffffffffff
; CHECK-NEXT: cmp x0, #0
; CHECK-NEXT: add x8, x0, x8
; CHECK-NEXT: csel x8, x8, x0, lt
@@ -90,8 +90,7 @@ define i64 @test7(i64 %x) {
define i64 @test8(i64 %x) {
; ISEL-LABEL: test8:
; ISEL: // %bb.0:
-; ISEL-NEXT: cmp x0, #0
-; ISEL-NEXT: cinc x8, x0, lt
+; ISEL-NEXT: add x8, x0, x0, lsr #63
; ISEL-NEXT: asr x0, x8, #1
; ISEL-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 46c5ec375721c8..1c69321830ed55 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -387,10 +387,8 @@ define i32 @sdiv_int(i32 %begin, i32 %first) #0 {
; CHECK-LABEL: sdiv_int:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w0, w1
-; CHECK-NEXT: add w9, w8, #1
-; CHECK-NEXT: add w10, w8, #2
-; CHECK-NEXT: cmp w9, #0
-; CHECK-NEXT: csinc w8, w10, w8, lt
+; CHECK-NEXT: add w8, w8, #1
+; CHECK-NEXT: add w8, w8, w8, lsr #31
; CHECK-NEXT: sub w0, w0, w8, asr #1
; CHECK-NEXT: ret
%sub = add i32 %begin, 1
More information about the llvm-commits
mailing list