[llvm] [RISCV][SDAG] Prefer ShortForwardBranch to lower sdiv by pow2 (PR #67364)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 09:37:03 PDT 2023
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/67364
>From 379efab6fef2a735770ae44d14c504a6c9332241 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 24 Oct 2023 22:54:55 +0800
Subject: [PATCH 1/3] [RISCV] Add pre-commit tests for PR67364. NFC.
---
llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll | 217 ++++++++++++++++++++++
1 file changed, 217 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
diff --git a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
new file mode 100644
index 000000000000000..1b9451171ffef0f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
@@ -0,0 +1,217 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+c,+m -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=CHECK,NOSFB %s
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=CHECK,SFB %s
+
+define signext i32 @sdiv2_32(i32 signext %0) {
+; CHECK-LABEL: sdiv2_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srliw a1, a0, 31
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: sraiw a0, a0, 1
+; CHECK-NEXT: ret
+ %res = sdiv i32 %0, 2
+ ret i32 %res
+}
+
+define signext i32 @sdivneg2_32(i32 signext %0) {
+; CHECK-LABEL: sdivneg2_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srliw a1, a0, 31
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: sraiw a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %res = sdiv i32 %0, -2
+ ret i32 %res
+}
+
+define i64 @sdiv2_64(i64 %0) {
+; CHECK-LABEL: sdiv2_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 63
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: srai a0, a0, 1
+; CHECK-NEXT: ret
+ %res = sdiv i64 %0, 2
+ ret i64 %res
+}
+
+define i64 @sdivneg2_64(i64 %0) {
+; CHECK-LABEL: sdivneg2_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 63
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: srai a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %res = sdiv i64 %0, -2
+ ret i64 %res
+}
+
+define signext i32 @srem2_32(i32 signext %0) {
+; CHECK-LABEL: srem2_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srliw a1, a0, 31
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -2
+; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: ret
+ %res = srem i32 %0, 2
+ ret i32 %res
+}
+
+define signext i32 @sremneg2_32(i32 signext %0) {
+; CHECK-LABEL: sremneg2_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srliw a1, a0, 31
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -2
+; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: ret
+ %res = srem i32 %0, -2
+ ret i32 %res
+}
+
+define i64 @srem2_64(i64 %0) {
+; CHECK-LABEL: srem2_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 63
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -2
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: ret
+ %res = srem i64 %0, 2
+ ret i64 %res
+}
+
+define i64 @sremneg2_64(i64 %0) {
+; CHECK-LABEL: sremneg2_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 63
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -2
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: ret
+ %res = srem i64 %0, -2
+ ret i64 %res
+}
+
+define signext i32 @sdiv8_32(i32 signext %0) {
+; CHECK-LABEL: sdiv8_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: srli a1, a1, 61
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: sraiw a0, a0, 3
+; CHECK-NEXT: ret
+ %res = sdiv i32 %0, 8
+ ret i32 %res
+}
+
+define signext i32 @sdivneg8_32(i32 signext %0) {
+; CHECK-LABEL: sdivneg8_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: srli a1, a1, 61
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: sraiw a0, a0, 3
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %res = sdiv i32 %0, -8
+ ret i32 %res
+}
+
+define i64 @sdiv8_64(i64 %0) {
+; CHECK-LABEL: sdiv8_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srai a1, a0, 63
+; CHECK-NEXT: srli a1, a1, 61
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: srai a0, a0, 3
+; CHECK-NEXT: ret
+ %res = sdiv i64 %0, 8
+ ret i64 %res
+}
+
+define i64 @sdivneg8_64(i64 %0) {
+; CHECK-LABEL: sdivneg8_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srai a1, a0, 63
+; CHECK-NEXT: srli a1, a1, 61
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: srai a0, a0, 3
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %res = sdiv i64 %0, -8
+ ret i64 %res
+}
+
+define signext i32 @srem8_32(i32 signext %0) {
+; CHECK-LABEL: srem8_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: srli a1, a1, 61
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -8
+; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: ret
+ %res = srem i32 %0, 8
+ ret i32 %res
+}
+
+define signext i32 @sremneg8_32(i32 signext %0) {
+; CHECK-LABEL: sremneg8_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: srli a1, a1, 61
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -8
+; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: ret
+ %res = srem i32 %0, -8
+ ret i32 %res
+}
+
+define i64 @srem8_64(i64 %0) {
+; CHECK-LABEL: srem8_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srai a1, a0, 63
+; CHECK-NEXT: srli a1, a1, 61
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -8
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: ret
+ %res = srem i64 %0, 8
+ ret i64 %res
+}
+
+define i64 @sremneg8_64(i64 %0) {
+; CHECK-LABEL: sremneg8_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srai a1, a0, 63
+; CHECK-NEXT: srli a1, a1, 61
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -8
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: ret
+ %res = srem i64 %0, -8
+ ret i64 %res
+}
+
+; Negative tests
+define i64 @sdiv4096(i64 %0) {
+; CHECK-LABEL: sdiv4096:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srai a1, a0, 63
+; CHECK-NEXT: srli a1, a1, 52
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: srai a0, a0, 12
+; CHECK-NEXT: ret
+ %res = sdiv i64 %0, 4096
+ ret i64 %res
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; NOSFB: {{.*}}
+; SFB: {{.*}}
>From 5826025f2d50bec2091d3bc803c9d3c705f46ae7 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Tue, 24 Oct 2023 23:34:12 +0800
Subject: [PATCH 2/3] [RISCV][SDAG] Prefer ShortForwardBranch to lower sdiv by
pow2
---
llvm/include/llvm/CodeGen/TargetLowering.h | 4 +
.../CodeGen/SelectionDAG/TargetLowering.cpp | 43 ++
.../Target/AArch64/AArch64ISelLowering.cpp | 28 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 20 +
llvm/lib/Target/RISCV/RISCVISelLowering.h | 3 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 27 +-
llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll | 399 ++++++++++++------
7 files changed, 352 insertions(+), 172 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 1494f335e4936fe..1e5901f0fed0c9d 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4933,6 +4933,10 @@ class TargetLowering : public TargetLoweringBase {
SmallVectorImpl<SDNode *> &Created) const;
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
SmallVectorImpl<SDNode *> &Created) const;
+ // Build sdiv by power-of-2 with conditional move instructions
+ SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const;
/// Targets may override this function to provide custom SDIV lowering for
/// power-of-2 denominators. If the target returns an empty SDValue, LLVM
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8b4f3159499122a..c125a48aded9a5e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6020,6 +6020,49 @@ TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
return SDValue();
}
+/// Build sdiv by power-of-2 with conditional move instructions
+/// Ref: "Hacker's Delight" by Henry Warren 10-1
+/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
+/// bgez x, label
+/// add x, x, 2**k-1
+/// label:
+/// sra res, x, k
+/// neg res, res (when the divisor is negative)
+SDValue TargetLowering::buildSDIVPow2WithCMov(
+ SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ unsigned Lg2 = Divisor.countr_zero();
+ EVT VT = N->getValueType(0);
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
+ SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
+
+ // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CMov.getNode());
+
+ // Divide by pow2.
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (Divisor.isNonNegative())
+ return SRA;
+
+ Created.push_back(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
+}
+
/// Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 565a9d55cce3ea1..3ac982b5adf0e1c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16279,33 +16279,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
- SDLoc DL(N);
- SDValue N0 = N->getOperand(0);
- unsigned Lg2 = Divisor.countr_zero();
- SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
-
- // Add (N0 < 0) ? Pow2 - 1 : 0;
- SDValue CCVal;
- SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
- SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
-
- Created.push_back(Cmp.getNode());
- Created.push_back(Add.getNode());
- Created.push_back(CSel.getNode());
-
- // Divide by pow2.
- SDValue SRA =
- DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
-
- // If we're dividing by a positive value, we're done. Otherwise, we must
- // negate the result.
- if (Divisor.isNonNegative())
- return SRA;
-
- Created.push_back(SRA.getNode());
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
}
SDValue
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1f56ca17b785bc0..d4235531a856236 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19304,6 +19304,26 @@ unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
return isCtpopFast(VT) ? 0 : 1;
}
+SDValue
+RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SDIV as SDIV
+
+ // Only perform this transform if short forward branch opt is supported.
+ if (!Subtarget.hasShortForwardBranchOpt())
+ return SDValue();
+ EVT VT = N->getValueType(0);
+ if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
+ return SDValue();
+ unsigned Lg2 = Divisor.countr_zero();
+ // ensure 2**k-1 < 2048
+ if (Lg2 >= 11)
+ return SDValue();
+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
+}
namespace llvm::RISCVVIntrinsicsTable {
#define GET_RISCVVIntrinsicsTable_IMPL
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 2675b0ce43e439f..5688d4e9922b27a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -955,6 +955,9 @@ class RISCVTargetLowering : public TargetLowering {
/// For available scheduling models FDIV + two independent FMULs are much
/// faster than two FDIVs.
unsigned combineRepeatedFPDivisors() const override;
+
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
};
namespace RISCV {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 20e0210bcec5b6a..e6741e18e5c24b6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22714,32 +22714,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (Lg2 == 1)
return SDValue();
- SDLoc DL(N);
- SDValue N0 = N->getOperand(0);
- SDValue Zero = DAG.getConstant(0, DL, VT);
- APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
- SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
-
- // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
- SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
- SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
-
- Created.push_back(Cmp.getNode());
- Created.push_back(Add.getNode());
- Created.push_back(CMov.getNode());
-
- // Divide by pow2.
- SDValue SRA =
- DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8));
-
- // If we're dividing by a positive value, we're done. Otherwise, we must
- // negate the result.
- if (Divisor.isNonNegative())
- return SRA;
-
- Created.push_back(SRA.getNode());
- return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
}
/// Result of 'and' is compared against zero. Change to a BT node if possible.
diff --git a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
index 1b9451171ffef0f..f7dda8288567876 100644
--- a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
+++ b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
@@ -5,197 +5,361 @@
; RUN: | FileCheck -check-prefixes=CHECK,SFB %s
define signext i32 @sdiv2_32(i32 signext %0) {
-; CHECK-LABEL: sdiv2_32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srliw a1, a0, 31
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: sraiw a0, a0, 1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sdiv2_32:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srliw a1, a0, 31
+; NOSFB-NEXT: add a0, a0, a1
+; NOSFB-NEXT: sraiw a0, a0, 1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sdiv2_32:
+; SFB: # %bb.0:
+; SFB-NEXT: bgez a0, .LBB0_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a0, a0, 1
+; SFB-NEXT: .LBB0_2:
+; SFB-NEXT: sraiw a0, a0, 1
+; SFB-NEXT: ret
%res = sdiv i32 %0, 2
ret i32 %res
}
define signext i32 @sdivneg2_32(i32 signext %0) {
-; CHECK-LABEL: sdivneg2_32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srliw a1, a0, 31
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: sraiw a0, a0, 1
-; CHECK-NEXT: neg a0, a0
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sdivneg2_32:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srliw a1, a0, 31
+; NOSFB-NEXT: add a0, a0, a1
+; NOSFB-NEXT: sraiw a0, a0, 1
+; NOSFB-NEXT: neg a0, a0
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sdivneg2_32:
+; SFB: # %bb.0:
+; SFB-NEXT: bgez a0, .LBB1_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a0, a0, 1
+; SFB-NEXT: .LBB1_2:
+; SFB-NEXT: sraiw a0, a0, 1
+; SFB-NEXT: neg a0, a0
+; SFB-NEXT: ret
%res = sdiv i32 %0, -2
ret i32 %res
}
define i64 @sdiv2_64(i64 %0) {
-; CHECK-LABEL: sdiv2_64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a0, 63
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: srai a0, a0, 1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sdiv2_64:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srli a1, a0, 63
+; NOSFB-NEXT: add a0, a0, a1
+; NOSFB-NEXT: srai a0, a0, 1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sdiv2_64:
+; SFB: # %bb.0:
+; SFB-NEXT: bgez a0, .LBB2_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a0, a0, 1
+; SFB-NEXT: .LBB2_2:
+; SFB-NEXT: srai a0, a0, 1
+; SFB-NEXT: ret
%res = sdiv i64 %0, 2
ret i64 %res
}
define i64 @sdivneg2_64(i64 %0) {
-; CHECK-LABEL: sdivneg2_64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a0, 63
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: srai a0, a0, 1
-; CHECK-NEXT: neg a0, a0
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sdivneg2_64:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srli a1, a0, 63
+; NOSFB-NEXT: add a0, a0, a1
+; NOSFB-NEXT: srai a0, a0, 1
+; NOSFB-NEXT: neg a0, a0
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sdivneg2_64:
+; SFB: # %bb.0:
+; SFB-NEXT: bgez a0, .LBB3_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a0, a0, 1
+; SFB-NEXT: .LBB3_2:
+; SFB-NEXT: srai a0, a0, 1
+; SFB-NEXT: neg a0, a0
+; SFB-NEXT: ret
%res = sdiv i64 %0, -2
ret i64 %res
}
define signext i32 @srem2_32(i32 signext %0) {
-; CHECK-LABEL: srem2_32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srliw a1, a0, 31
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: andi a1, a1, -2
-; CHECK-NEXT: subw a0, a0, a1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: srem2_32:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srliw a1, a0, 31
+; NOSFB-NEXT: add a1, a1, a0
+; NOSFB-NEXT: andi a1, a1, -2
+; NOSFB-NEXT: subw a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: srem2_32:
+; SFB: # %bb.0:
+; SFB-NEXT: mv a1, a0
+; SFB-NEXT: bgez a0, .LBB4_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a1, a0, 1
+; SFB-NEXT: .LBB4_2:
+; SFB-NEXT: andi a1, a1, -2
+; SFB-NEXT: subw a0, a0, a1
+; SFB-NEXT: ret
%res = srem i32 %0, 2
ret i32 %res
}
define signext i32 @sremneg2_32(i32 signext %0) {
-; CHECK-LABEL: sremneg2_32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srliw a1, a0, 31
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: andi a1, a1, -2
-; CHECK-NEXT: subw a0, a0, a1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sremneg2_32:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srliw a1, a0, 31
+; NOSFB-NEXT: add a1, a1, a0
+; NOSFB-NEXT: andi a1, a1, -2
+; NOSFB-NEXT: subw a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sremneg2_32:
+; SFB: # %bb.0:
+; SFB-NEXT: mv a1, a0
+; SFB-NEXT: bgez a0, .LBB5_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a1, a0, 1
+; SFB-NEXT: .LBB5_2:
+; SFB-NEXT: andi a1, a1, -2
+; SFB-NEXT: subw a0, a0, a1
+; SFB-NEXT: ret
%res = srem i32 %0, -2
ret i32 %res
}
define i64 @srem2_64(i64 %0) {
-; CHECK-LABEL: srem2_64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a0, 63
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: andi a1, a1, -2
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: srem2_64:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srli a1, a0, 63
+; NOSFB-NEXT: add a1, a1, a0
+; NOSFB-NEXT: andi a1, a1, -2
+; NOSFB-NEXT: sub a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: srem2_64:
+; SFB: # %bb.0:
+; SFB-NEXT: mv a1, a0
+; SFB-NEXT: bgez a0, .LBB6_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a1, a0, 1
+; SFB-NEXT: .LBB6_2:
+; SFB-NEXT: andi a1, a1, -2
+; SFB-NEXT: sub a0, a0, a1
+; SFB-NEXT: ret
%res = srem i64 %0, 2
ret i64 %res
}
define i64 @sremneg2_64(i64 %0) {
-; CHECK-LABEL: sremneg2_64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a0, 63
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: andi a1, a1, -2
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sremneg2_64:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srli a1, a0, 63
+; NOSFB-NEXT: add a1, a1, a0
+; NOSFB-NEXT: andi a1, a1, -2
+; NOSFB-NEXT: sub a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sremneg2_64:
+; SFB: # %bb.0:
+; SFB-NEXT: mv a1, a0
+; SFB-NEXT: bgez a0, .LBB7_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a1, a0, 1
+; SFB-NEXT: .LBB7_2:
+; SFB-NEXT: andi a1, a1, -2
+; SFB-NEXT: sub a0, a0, a1
+; SFB-NEXT: ret
%res = srem i64 %0, -2
ret i64 %res
}
define signext i32 @sdiv8_32(i32 signext %0) {
-; CHECK-LABEL: sdiv8_32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: srli a1, a1, 61
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: sraiw a0, a0, 3
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sdiv8_32:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: slli a1, a0, 1
+; NOSFB-NEXT: srli a1, a1, 61
+; NOSFB-NEXT: add a0, a0, a1
+; NOSFB-NEXT: sraiw a0, a0, 3
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sdiv8_32:
+; SFB: # %bb.0:
+; SFB-NEXT: bgez a0, .LBB8_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a0, a0, 7
+; SFB-NEXT: .LBB8_2:
+; SFB-NEXT: sraiw a0, a0, 3
+; SFB-NEXT: ret
%res = sdiv i32 %0, 8
ret i32 %res
}
define signext i32 @sdivneg8_32(i32 signext %0) {
-; CHECK-LABEL: sdivneg8_32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: srli a1, a1, 61
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: sraiw a0, a0, 3
-; CHECK-NEXT: neg a0, a0
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sdivneg8_32:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: slli a1, a0, 1
+; NOSFB-NEXT: srli a1, a1, 61
+; NOSFB-NEXT: add a0, a0, a1
+; NOSFB-NEXT: sraiw a0, a0, 3
+; NOSFB-NEXT: neg a0, a0
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sdivneg8_32:
+; SFB: # %bb.0:
+; SFB-NEXT: bgez a0, .LBB9_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a0, a0, 7
+; SFB-NEXT: .LBB9_2:
+; SFB-NEXT: sraiw a0, a0, 3
+; SFB-NEXT: neg a0, a0
+; SFB-NEXT: ret
%res = sdiv i32 %0, -8
ret i32 %res
}
define i64 @sdiv8_64(i64 %0) {
-; CHECK-LABEL: sdiv8_64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srai a1, a0, 63
-; CHECK-NEXT: srli a1, a1, 61
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: srai a0, a0, 3
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sdiv8_64:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srai a1, a0, 63
+; NOSFB-NEXT: srli a1, a1, 61
+; NOSFB-NEXT: add a0, a0, a1
+; NOSFB-NEXT: srai a0, a0, 3
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sdiv8_64:
+; SFB: # %bb.0:
+; SFB-NEXT: bgez a0, .LBB10_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a0, a0, 7
+; SFB-NEXT: .LBB10_2:
+; SFB-NEXT: srai a0, a0, 3
+; SFB-NEXT: ret
%res = sdiv i64 %0, 8
ret i64 %res
}
define i64 @sdivneg8_64(i64 %0) {
-; CHECK-LABEL: sdivneg8_64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srai a1, a0, 63
-; CHECK-NEXT: srli a1, a1, 61
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: srai a0, a0, 3
-; CHECK-NEXT: neg a0, a0
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sdivneg8_64:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srai a1, a0, 63
+; NOSFB-NEXT: srli a1, a1, 61
+; NOSFB-NEXT: add a0, a0, a1
+; NOSFB-NEXT: srai a0, a0, 3
+; NOSFB-NEXT: neg a0, a0
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sdivneg8_64:
+; SFB: # %bb.0:
+; SFB-NEXT: bgez a0, .LBB11_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a0, a0, 7
+; SFB-NEXT: .LBB11_2:
+; SFB-NEXT: srai a0, a0, 3
+; SFB-NEXT: neg a0, a0
+; SFB-NEXT: ret
%res = sdiv i64 %0, -8
ret i64 %res
}
define signext i32 @srem8_32(i32 signext %0) {
-; CHECK-LABEL: srem8_32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: srli a1, a1, 61
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: andi a1, a1, -8
-; CHECK-NEXT: subw a0, a0, a1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: srem8_32:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: slli a1, a0, 1
+; NOSFB-NEXT: srli a1, a1, 61
+; NOSFB-NEXT: add a1, a1, a0
+; NOSFB-NEXT: andi a1, a1, -8
+; NOSFB-NEXT: subw a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: srem8_32:
+; SFB: # %bb.0:
+; SFB-NEXT: mv a1, a0
+; SFB-NEXT: bgez a0, .LBB12_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a1, a0, 7
+; SFB-NEXT: .LBB12_2:
+; SFB-NEXT: andi a1, a1, -8
+; SFB-NEXT: subw a0, a0, a1
+; SFB-NEXT: ret
%res = srem i32 %0, 8
ret i32 %res
}
define signext i32 @sremneg8_32(i32 signext %0) {
-; CHECK-LABEL: sremneg8_32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: srli a1, a1, 61
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: andi a1, a1, -8
-; CHECK-NEXT: subw a0, a0, a1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sremneg8_32:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: slli a1, a0, 1
+; NOSFB-NEXT: srli a1, a1, 61
+; NOSFB-NEXT: add a1, a1, a0
+; NOSFB-NEXT: andi a1, a1, -8
+; NOSFB-NEXT: subw a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sremneg8_32:
+; SFB: # %bb.0:
+; SFB-NEXT: mv a1, a0
+; SFB-NEXT: bgez a0, .LBB13_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a1, a0, 7
+; SFB-NEXT: .LBB13_2:
+; SFB-NEXT: andi a1, a1, -8
+; SFB-NEXT: subw a0, a0, a1
+; SFB-NEXT: ret
%res = srem i32 %0, -8
ret i32 %res
}
define i64 @srem8_64(i64 %0) {
-; CHECK-LABEL: srem8_64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srai a1, a0, 63
-; CHECK-NEXT: srli a1, a1, 61
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: andi a1, a1, -8
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: srem8_64:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srai a1, a0, 63
+; NOSFB-NEXT: srli a1, a1, 61
+; NOSFB-NEXT: add a1, a1, a0
+; NOSFB-NEXT: andi a1, a1, -8
+; NOSFB-NEXT: sub a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: srem8_64:
+; SFB: # %bb.0:
+; SFB-NEXT: mv a1, a0
+; SFB-NEXT: bgez a0, .LBB14_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a1, a0, 7
+; SFB-NEXT: .LBB14_2:
+; SFB-NEXT: andi a1, a1, -8
+; SFB-NEXT: sub a0, a0, a1
+; SFB-NEXT: ret
%res = srem i64 %0, 8
ret i64 %res
}
define i64 @sremneg8_64(i64 %0) {
-; CHECK-LABEL: sremneg8_64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srai a1, a0, 63
-; CHECK-NEXT: srli a1, a1, 61
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: andi a1, a1, -8
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: ret
+; NOSFB-LABEL: sremneg8_64:
+; NOSFB: # %bb.0:
+; NOSFB-NEXT: srai a1, a0, 63
+; NOSFB-NEXT: srli a1, a1, 61
+; NOSFB-NEXT: add a1, a1, a0
+; NOSFB-NEXT: andi a1, a1, -8
+; NOSFB-NEXT: sub a0, a0, a1
+; NOSFB-NEXT: ret
+;
+; SFB-LABEL: sremneg8_64:
+; SFB: # %bb.0:
+; SFB-NEXT: mv a1, a0
+; SFB-NEXT: bgez a0, .LBB15_2
+; SFB-NEXT: # %bb.1:
+; SFB-NEXT: addi a1, a0, 7
+; SFB-NEXT: .LBB15_2:
+; SFB-NEXT: andi a1, a1, -8
+; SFB-NEXT: sub a0, a0, a1
+; SFB-NEXT: ret
%res = srem i64 %0, -8
ret i64 %res
}
@@ -212,6 +376,3 @@ define i64 @sdiv4096(i64 %0) {
%res = sdiv i64 %0, 4096
ret i64 %res
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; NOSFB: {{.*}}
-; SFB: {{.*}}
>From a56b6a63689febfeb01c651fa46fb5a832a8c5d9 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 25 Oct 2023 00:36:09 +0800
Subject: [PATCH 3/3] fixup! [RISCV][SDAG] Prefer ShortForwardBranch to lower
sdiv by pow2
Address comments.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +---
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 ++---
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d4235531a856236..54c1fd7cdac98fb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19318,9 +19318,7 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
EVT VT = N->getValueType(0);
if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
return SDValue();
- unsigned Lg2 = Divisor.countr_zero();
- // ensure 2**k-1 < 2048
- if (Lg2 >= 11)
+ if (Divisor.sgt(2048) || Divisor.slt(-2048))
return SDValue();
return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e6741e18e5c24b6..c23687a2278054e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22708,10 +22708,9 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
!(Subtarget.is64Bit() && VT == MVT::i64))
return SDValue();
- unsigned Lg2 = Divisor.countr_zero();
-
// If the divisor is 2 or -2, the default expansion is better.
- if (Lg2 == 1)
+ if (Divisor == 2 ||
+ Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true))
return SDValue();
return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
More information about the llvm-commits
mailing list