[llvm] [RISCV][SDAG] Prefer ShortForwardBranch to lower sdiv by pow2 (PR #67364)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 25 13:10:41 PDT 2023
llvmbot wrote:
<!--IGNORE-->
>
><!--LLVM PR SUMMARY COMMENT-->
>
>@llvm/pr-subscribers-backend-x86
>
><details>
><summary>Changes</summary>
>
>This patch lowers `sdiv x, +/-2**k` to `add + select + shift` when the short forward branch optimization is enabled. The latter inst seq performs faster than the seq generated by target-independent DAGCombiner. This algorithm is described in ***Hacker's Delight***.
>
>This patch also removes duplicate logic in the X86 and AArch64 backend. But we cannot do this for the PowerPC backend since it generates a special instruction `addze`.
>
>
>---
>Full diff: https://github.com/llvm/llvm-project/pull/67364.diff
>
>
>7 Files Affected:
>
>- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+4)
>- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+42)
>- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+1-27)
>- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+20)
>- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+3)
>- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+1-26)
>- (added) llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll (+386)
>
>
>``````````diff
>diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
>index b2a0a8c15cf14de..b2ebec61c996df1 100644
>--- a/llvm/include/llvm/CodeGen/TargetLowering.h
>+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
>@@ -4915,6 +4915,10 @@ class TargetLowering : public TargetLoweringBase {
> SmallVectorImpl<SDNode *> &Created) const;
> SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
> SmallVectorImpl<SDNode *> &Created) const;
>+ // Build sdiv by power-of-2 with conditional move instructions
>+ SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor,
>+ SelectionDAG &DAG,
>+ SmallVectorImpl<SDNode *> &Created) const;
>
> /// Targets may override this function to provide custom SDIV lowering for
> /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
>diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
>index 6aaaa2f68638c7d..7b6f99365512a43 100644
>--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
>+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
>@@ -6009,6 +6009,48 @@ TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
> return SDValue();
> }
>
>+/// Build sdiv by power-of-2 with conditional move instructions
>+/// Ref: "Hacker's Delight" by Henry Warren 10-1
>+/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
>+/// bgez x, label
>+/// add x, x, 2**k-1
>+/// label:
>+/// sra res, x, k
>+/// neg res, res (when the divisor is negative)
>+SDValue TargetLowering::buildSDIVPow2WithCMov(
>+ SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
>+ SmallVectorImpl<SDNode *> &Created) const {
>+ unsigned Lg2 = Divisor.countr_zero();
>+ EVT VT = N->getValueType(0);
>+
>+ SDLoc DL(N);
>+ SDValue N0 = N->getOperand(0);
>+ SDValue Zero = DAG.getConstant(0, DL, VT);
>+ APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
>+ SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
>+
>+ // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
>+ SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
>+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
>+ SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
>+
>+ Created.push_back(Cmp.getNode());
>+ Created.push_back(Add.getNode());
>+ Created.push_back(CMov.getNode());
>+
>+ // Divide by pow2.
>+ SDValue SRA =
>+ DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8));
>+
>+ // If we're dividing by a positive value, we're done. Otherwise, we must
>+ // negate the result.
>+ if (Divisor.isNonNegative())
>+ return SRA;
>+
>+ Created.push_back(SRA.getNode());
>+ return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
>+}
>+
> /// Given an ISD::SDIV node expressing a divide by constant,
> /// return a DAG expression to select that will generate the same value by
> /// multiplying by a magic number.
>diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
>index 3de6bd1ec94a82a..fb4a5594a04d5f3 100644
>--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
>+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
>@@ -16102,33 +16102,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
> !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
> return SDValue();
>
>- SDLoc DL(N);
>- SDValue N0 = N->getOperand(0);
>- unsigned Lg2 = Divisor.countr_zero();
>- SDValue Zero = DAG.getConstant(0, DL, VT);
>- SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
>-
>- // Add (N0 < 0) ? Pow2 - 1 : 0;
>- SDValue CCVal;
>- SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
>- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
>- SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
>-
>- Created.push_back(Cmp.getNode());
>- Created.push_back(Add.getNode());
>- Created.push_back(CSel.getNode());
>-
>- // Divide by pow2.
>- SDValue SRA =
>- DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
>-
>- // If we're dividing by a positive value, we're done. Otherwise, we must
>- // negate the result.
>- if (Divisor.isNonNegative())
>- return SRA;
>-
>- Created.push_back(SRA.getNode());
>- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
>+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
> }
>
> SDValue
>diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
>index 686350de29883aa..b40885f8d597943 100644
>--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
>+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
>@@ -18803,6 +18803,26 @@ unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
> return isCtpopFast(VT) ? 0 : 1;
> }
>
>+SDValue
>+RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
>+ SelectionDAG &DAG,
>+ SmallVectorImpl<SDNode *> &Created) const {
>+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
>+ if (isIntDivCheap(N->getValueType(0), Attr))
>+ return SDValue(N, 0); // Lower SDIV as SDIV
>+
>+ // Only perform this transform if short forward branch opt is supported.
>+ if (!Subtarget.hasShortForwardBranchOpt())
>+ return SDValue();
>+ EVT VT = N->getValueType(0);
>+ if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
>+ return SDValue();
>+ unsigned Lg2 = Divisor.countr_zero();
>+ // ensure 2**k-1 < 2048
>+ if (Lg2 >= 11)
>+ return SDValue();
>+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
>+}
> namespace llvm::RISCVVIntrinsicsTable {
>
> #define GET_RISCVVIntrinsicsTable_IMPL
>diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
>index 0b8e6994a876ac8..5f6f22487d8ac3a 100644
>--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
>+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
>@@ -954,6 +954,9 @@ class RISCVTargetLowering : public TargetLowering {
> /// For available scheduling models FDIV + two independent FMULs are much
> /// faster than two FDIVs.
> unsigned combineRepeatedFPDivisors() const override;
>+
>+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
>+ SmallVectorImpl<SDNode *> &Created) const override;
> };
>
> namespace RISCV {
>diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
>index 71998140325b1f7..040358a5d1eeb92 100644
>--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
>+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
>@@ -22624,32 +22624,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
> if (Lg2 == 1)
> return SDValue();
>
>- SDLoc DL(N);
>- SDValue N0 = N->getOperand(0);
>- SDValue Zero = DAG.getConstant(0, DL, VT);
>- APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
>- SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
>-
>- // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
>- SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
>- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
>- SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
>-
>- Created.push_back(Cmp.getNode());
>- Created.push_back(Add.getNode());
>- Created.push_back(CMov.getNode());
>-
>- // Divide by pow2.
>- SDValue SRA =
>- DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8));
>-
>- // If we're dividing by a positive value, we're done. Otherwise, we must
>- // negate the result.
>- if (Divisor.isNonNegative())
>- return SRA;
>-
>- Created.push_back(SRA.getNode());
>- return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
>+ return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
> }
>
> /// Result of 'and' is compared against zero. Change to a BT node if possible.
>diff --git a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
>new file mode 100644
>index 000000000000000..ba46fe800763368
>--- /dev/null
>+++ b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
>@@ -0,0 +1,386 @@
>+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
>+; RUN: llc -mtriple=riscv64 -mattr=+c -verify-machineinstrs < %s \
>+; RUN: | FileCheck -check-prefix=NOSFB %s
>+; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -verify-machineinstrs < %s \
>+; RUN: | FileCheck -check-prefixes=SFB %s
>+
>+define signext i32 @sdiv2_32(i32 signext %0) {
>+; NOSFB-LABEL: sdiv2_32:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srliw a1, a0, 31
>+; NOSFB-NEXT: add a0, a0, a1
>+; NOSFB-NEXT: sraiw a0, a0, 1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sdiv2_32:
>+; SFB: # %bb.0:
>+; SFB-NEXT: bgez a0, .LBB0_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a0, a0, 1
>+; SFB-NEXT: .LBB0_2:
>+; SFB-NEXT: sraiw a0, a0, 1
>+; SFB-NEXT: ret
>+ %res = sdiv i32 %0, 2
>+ ret i32 %res
>+}
>+
>+define signext i32 @sdivneg2_32(i32 signext %0) {
>+; NOSFB-LABEL: sdivneg2_32:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srliw a1, a0, 31
>+; NOSFB-NEXT: add a0, a0, a1
>+; NOSFB-NEXT: sraiw a0, a0, 1
>+; NOSFB-NEXT: neg a0, a0
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sdivneg2_32:
>+; SFB: # %bb.0:
>+; SFB-NEXT: bgez a0, .LBB1_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a0, a0, 1
>+; SFB-NEXT: .LBB1_2:
>+; SFB-NEXT: sraiw a0, a0, 1
>+; SFB-NEXT: neg a0, a0
>+; SFB-NEXT: ret
>+ %res = sdiv i32 %0, -2
>+ ret i32 %res
>+}
>+
>+define i64 @sdiv2_64(i64 %0) {
>+; NOSFB-LABEL: sdiv2_64:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srli a1, a0, 63
>+; NOSFB-NEXT: add a0, a0, a1
>+; NOSFB-NEXT: srai a0, a0, 1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sdiv2_64:
>+; SFB: # %bb.0:
>+; SFB-NEXT: bgez a0, .LBB2_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a0, a0, 1
>+; SFB-NEXT: .LBB2_2:
>+; SFB-NEXT: srai a0, a0, 1
>+; SFB-NEXT: ret
>+ %res = sdiv i64 %0, 2
>+ ret i64 %res
>+}
>+
>+define i64 @sdivneg2_64(i64 %0) {
>+; NOSFB-LABEL: sdivneg2_64:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srli a1, a0, 63
>+; NOSFB-NEXT: add a0, a0, a1
>+; NOSFB-NEXT: srai a0, a0, 1
>+; NOSFB-NEXT: neg a0, a0
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sdivneg2_64:
>+; SFB: # %bb.0:
>+; SFB-NEXT: bgez a0, .LBB3_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a0, a0, 1
>+; SFB-NEXT: .LBB3_2:
>+; SFB-NEXT: srai a0, a0, 1
>+; SFB-NEXT: neg a0, a0
>+; SFB-NEXT: ret
>+ %res = sdiv i64 %0, -2
>+ ret i64 %res
>+}
>+
>+define signext i32 @srem2_32(i32 signext %0) {
>+; NOSFB-LABEL: srem2_32:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srliw a1, a0, 31
>+; NOSFB-NEXT: add a1, a1, a0
>+; NOSFB-NEXT: andi a1, a1, -2
>+; NOSFB-NEXT: subw a0, a0, a1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: srem2_32:
>+; SFB: # %bb.0:
>+; SFB-NEXT: mv a1, a0
>+; SFB-NEXT: bgez a0, .LBB4_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a1, a0, 1
>+; SFB-NEXT: .LBB4_2:
>+; SFB-NEXT: andi a1, a1, -2
>+; SFB-NEXT: subw a0, a0, a1
>+; SFB-NEXT: ret
>+ %res = srem i32 %0, 2
>+ ret i32 %res
>+}
>+
>+define signext i32 @sremneg2_32(i32 signext %0) {
>+; NOSFB-LABEL: sremneg2_32:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srliw a1, a0, 31
>+; NOSFB-NEXT: add a1, a1, a0
>+; NOSFB-NEXT: andi a1, a1, -2
>+; NOSFB-NEXT: subw a0, a0, a1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sremneg2_32:
>+; SFB: # %bb.0:
>+; SFB-NEXT: mv a1, a0
>+; SFB-NEXT: bgez a0, .LBB5_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a1, a0, 1
>+; SFB-NEXT: .LBB5_2:
>+; SFB-NEXT: andi a1, a1, -2
>+; SFB-NEXT: subw a0, a0, a1
>+; SFB-NEXT: ret
>+ %res = srem i32 %0, -2
>+ ret i32 %res
>+}
>+
>+define i64 @srem2_64(i64 %0) {
>+; NOSFB-LABEL: srem2_64:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srli a1, a0, 63
>+; NOSFB-NEXT: add a1, a1, a0
>+; NOSFB-NEXT: andi a1, a1, -2
>+; NOSFB-NEXT: sub a0, a0, a1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: srem2_64:
>+; SFB: # %bb.0:
>+; SFB-NEXT: mv a1, a0
>+; SFB-NEXT: bgez a0, .LBB6_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a1, a0, 1
>+; SFB-NEXT: .LBB6_2:
>+; SFB-NEXT: andi a1, a1, -2
>+; SFB-NEXT: sub a0, a0, a1
>+; SFB-NEXT: ret
>+ %res = srem i64 %0, 2
>+ ret i64 %res
>+}
>+
>+define i64 @sremneg2_64(i64 %0) {
>+; NOSFB-LABEL: sremneg2_64:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srli a1, a0, 63
>+; NOSFB-NEXT: add a1, a1, a0
>+; NOSFB-NEXT: andi a1, a1, -2
>+; NOSFB-NEXT: sub a0, a0, a1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sremneg2_64:
>+; SFB: # %bb.0:
>+; SFB-NEXT: mv a1, a0
>+; SFB-NEXT: bgez a0, .LBB7_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a1, a0, 1
>+; SFB-NEXT: .LBB7_2:
>+; SFB-NEXT: andi a1, a1, -2
>+; SFB-NEXT: sub a0, a0, a1
>+; SFB-NEXT: ret
>+ %res = srem i64 %0, -2
>+ ret i64 %res
>+}
>+
>+define signext i32 @sdiv8_32(i32 signext %0) {
>+; NOSFB-LABEL: sdiv8_32:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: slli a1, a0, 1
>+; NOSFB-NEXT: srli a1, a1, 61
>+; NOSFB-NEXT: add a0, a0, a1
>+; NOSFB-NEXT: sraiw a0, a0, 3
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sdiv8_32:
>+; SFB: # %bb.0:
>+; SFB-NEXT: bgez a0, .LBB8_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a0, a0, 7
>+; SFB-NEXT: .LBB8_2:
>+; SFB-NEXT: sraiw a0, a0, 3
>+; SFB-NEXT: ret
>+ %res = sdiv i32 %0, 8
>+ ret i32 %res
>+}
>+
>+define signext i32 @sdivneg8_32(i32 signext %0) {
>+; NOSFB-LABEL: sdivneg8_32:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: slli a1, a0, 1
>+; NOSFB-NEXT: srli a1, a1, 61
>+; NOSFB-NEXT: add a0, a0, a1
>+; NOSFB-NEXT: sraiw a0, a0, 3
>+; NOSFB-NEXT: neg a0, a0
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sdivneg8_32:
>+; SFB: # %bb.0:
>+; SFB-NEXT: bgez a0, .LBB9_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a0, a0, 7
>+; SFB-NEXT: .LBB9_2:
>+; SFB-NEXT: sraiw a0, a0, 3
>+; SFB-NEXT: neg a0, a0
>+; SFB-NEXT: ret
>+ %res = sdiv i32 %0, -8
>+ ret i32 %res
>+}
>+
>+define i64 @sdiv8_64(i64 %0) {
>+; NOSFB-LABEL: sdiv8_64:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srai a1, a0, 63
>+; NOSFB-NEXT: srli a1, a1, 61
>+; NOSFB-NEXT: add a0, a0, a1
>+; NOSFB-NEXT: srai a0, a0, 3
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sdiv8_64:
>+; SFB: # %bb.0:
>+; SFB-NEXT: bgez a0, .LBB10_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a0, a0, 7
>+; SFB-NEXT: .LBB10_2:
>+; SFB-NEXT: srai a0, a0, 3
>+; SFB-NEXT: ret
>+ %res = sdiv i64 %0, 8
>+ ret i64 %res
>+}
>+
>+define i64 @sdivneg8_64(i64 %0) {
>+; NOSFB-LABEL: sdivneg8_64:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srai a1, a0, 63
>+; NOSFB-NEXT: srli a1, a1, 61
>+; NOSFB-NEXT: add a0, a0, a1
>+; NOSFB-NEXT: srai a0, a0, 3
>+; NOSFB-NEXT: neg a0, a0
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sdivneg8_64:
>+; SFB: # %bb.0:
>+; SFB-NEXT: bgez a0, .LBB11_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a0, a0, 7
>+; SFB-NEXT: .LBB11_2:
>+; SFB-NEXT: srai a0, a0, 3
>+; SFB-NEXT: neg a0, a0
>+; SFB-NEXT: ret
>+ %res = sdiv i64 %0, -8
>+ ret i64 %res
>+}
>+
>+define signext i32 @srem8_32(i32 signext %0) {
>+; NOSFB-LABEL: srem8_32:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: slli a1, a0, 1
>+; NOSFB-NEXT: srli a1, a1, 61
>+; NOSFB-NEXT: add a1, a1, a0
>+; NOSFB-NEXT: andi a1, a1, -8
>+; NOSFB-NEXT: subw a0, a0, a1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: srem8_32:
>+; SFB: # %bb.0:
>+; SFB-NEXT: mv a1, a0
>+; SFB-NEXT: bgez a0, .LBB12_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a1, a0, 7
>+; SFB-NEXT: .LBB12_2:
>+; SFB-NEXT: andi a1, a1, -8
>+; SFB-NEXT: subw a0, a0, a1
>+; SFB-NEXT: ret
>+ %res = srem i32 %0, 8
>+ ret i32 %res
>+}
>+
>+define signext i32 @sremneg8_32(i32 signext %0) {
>+; NOSFB-LABEL: sremneg8_32:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: slli a1, a0, 1
>+; NOSFB-NEXT: srli a1, a1, 61
>+; NOSFB-NEXT: add a1, a1, a0
>+; NOSFB-NEXT: andi a1, a1, -8
>+; NOSFB-NEXT: subw a0, a0, a1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sremneg8_32:
>+; SFB: # %bb.0:
>+; SFB-NEXT: mv a1, a0
>+; SFB-NEXT: bgez a0, .LBB13_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a1, a0, 7
>+; SFB-NEXT: .LBB13_2:
>+; SFB-NEXT: andi a1, a1, -8
>+; SFB-NEXT: subw a0, a0, a1
>+; SFB-NEXT: ret
>+ %res = srem i32 %0, -8
>+ ret i32 %res
>+}
>+
>+define i64 @srem8_64(i64 %0) {
>+; NOSFB-LABEL: srem8_64:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srai a1, a0, 63
>+; NOSFB-NEXT: srli a1, a1, 61
>+; NOSFB-NEXT: add a1, a1, a0
>+; NOSFB-NEXT: andi a1, a1, -8
>+; NOSFB-NEXT: sub a0, a0, a1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: srem8_64:
>+; SFB: # %bb.0:
>+; SFB-NEXT: mv a1, a0
>+; SFB-NEXT: bgez a0, .LBB14_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a1, a0, 7
>+; SFB-NEXT: .LBB14_2:
>+; SFB-NEXT: andi a1, a1, -8
>+; SFB-NEXT: sub a0, a0, a1
>+; SFB-NEXT: ret
>+ %res = srem i64 %0, 8
>+ ret i64 %res
>+}
>+
>+define i64 @sremneg8_64(i64 %0) {
>+; NOSFB-LABEL: sremneg8_64:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srai a1, a0, 63
>+; NOSFB-NEXT: srli a1, a1, 61
>+; NOSFB-NEXT: add a1, a1, a0
>+; NOSFB-NEXT: andi a1, a1, -8
>+; NOSFB-NEXT: sub a0, a0, a1
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sremneg8_64:
>+; SFB: # %bb.0:
>+; SFB-NEXT: mv a1, a0
>+; SFB-NEXT: bgez a0, .LBB15_2
>+; SFB-NEXT: # %bb.1:
>+; SFB-NEXT: addi a1, a0, 7
>+; SFB-NEXT: .LBB15_2:
>+; SFB-NEXT: andi a1, a1, -8
>+; SFB-NEXT: sub a0, a0, a1
>+; SFB-NEXT: ret
>+ %res = srem i64 %0, -8
>+ ret i64 %res
>+}
>+
>+; Negative tests
>+define i64 @sdiv4096(i64 %0) {
>+; NOSFB-LABEL: sdiv4096:
>+; NOSFB: # %bb.0:
>+; NOSFB-NEXT: srai a1, a0, 63
>+; NOSFB-NEXT: srli a1, a1, 52
>+; NOSFB-NEXT: add a0, a0, a1
>+; NOSFB-NEXT: srai a0, a0, 12
>+; NOSFB-NEXT: ret
>+;
>+; SFB-LABEL: sdiv4096:
>+; SFB: # %bb.0:
>+; SFB-NEXT: srai a1, a0, 63
>+; SFB-NEXT: srli a1, a1, 52
>+; SFB-NEXT: add a0, a0, a1
>+; SFB-NEXT: srai a0, a0, 12
>+; SFB-NEXT: ret
>+ %res = sdiv i64 %0, 4096
>+ ret i64 %res
>+}
>
>``````````
>
></details>
Error: Command failed due to missing milestone.
https://github.com/llvm/llvm-project/pull/67364
More information about the llvm-commits
mailing list