[llvm] 98d51c2 - [RISCV] Override TargetLowering::BuildSDIVPow2 to generate SELECT

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 10 23:55:01 PST 2022


Author: wangpc
Date: 2022-01-11T15:54:35+08:00
New Revision: 98d51c2542dc63608d3e98de415e090030e3ace1

URL: https://github.com/llvm/llvm-project/commit/98d51c2542dc63608d3e98de415e090030e3ace1
DIFF: https://github.com/llvm/llvm-project/commit/98d51c2542dc63608d3e98de415e090030e3ace1.diff

LOG: [RISCV] Override TargetLowering::BuildSDIVPow2 to generate SELECT

When `Zbt` is enabled, we can generate SELECT for division by power
of 2, so that there is no data dependency.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D114856

Added: 
    llvm/test/CodeGen/RISCV/div-pow2.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e2fc104d5309c..5ad708be75be8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10350,6 +10350,60 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
   return SDValue();
 }
 
+SDValue
+RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+                                   SelectionDAG &DAG,
+                                   SmallVectorImpl<SDNode *> &Created) const {
+  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+  if (isIntDivCheap(N->getValueType(0), Attr))
+    return SDValue(N, 0); // Lower SDIV as SDIV
+
+  assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
+         "Unexpected divisor!");
+
+  // Conditional move is needed, so do the transformation iff Zbt is enabled.
+  if (!Subtarget.hasStdExtZbt())
+    return SDValue();
+
+  // When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation.
+  // Besides, more critical path instructions will be generated when dividing
+  // by 2. So we keep using the original DAGs for these cases.
+  unsigned Lg2 = Divisor.countTrailingZeros();
+  if (Lg2 == 1 || Lg2 >= 12)
+    return SDValue();
+
+  // fold (sdiv X, pow2)
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
+
+  // Add (N0 < 0) ? Pow2 - 1 : 0;
+  SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT);
+  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+  SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+  Created.push_back(Cmp.getNode());
+  Created.push_back(Add.getNode());
+  Created.push_back(Sel.getNode());
+
+  // Divide by pow2.
+  SDValue SRA =
+      DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT));
+
+  // If we're dividing by a positive value, we're done.  Otherwise, we must
+  // negate the result.
+  if (Divisor.isNonNegative())
+    return SRA;
+
+  Created.push_back(SRA.getNode());
+  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
+}
+
 #define GET_REGISTER_MATCHER
 #include "RISCVGenAsmMatcher.inc"
 

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 43ec26bd38412..23e5d89af081d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -528,6 +528,9 @@ class RISCVTargetLowering : public TargetLowering {
 
   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
 
+  SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+                        SmallVectorImpl<SDNode *> &Created) const override;
+
 private:
   /// RISCVCCAssignFn - This target-specific function extends the default
   /// CCValAssign with additional information used to lower RISC-V calling

diff  --git a/llvm/test/CodeGen/RISCV/div-pow2.ll b/llvm/test/CodeGen/RISCV/div-pow2.ll
new file mode 100644
index 0000000000000..2b44983ae9877
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/div-pow2.ll
@@ -0,0 +1,809 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZBT
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBT
+
+define i32 @sdiv32_pow2_2(i32 %a) {
+; RV32I-LABEL: sdiv32_pow2_2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srli a1, a0, 31
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv32_pow2_2:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srli a1, a0, 31
+; RV32ZBT-NEXT:    add a0, a0, a1
+; RV32ZBT-NEXT:    srai a0, a0, 1
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv32_pow2_2:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srliw a1, a0, 31
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv32_pow2_2:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srliw a1, a0, 31
+; RV64ZBT-NEXT:    addw a0, a0, a1
+; RV64ZBT-NEXT:    sraiw a0, a0, 1
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i32 %a, 2
+  ret i32 %div
+}
+
+define i32 @sdiv32_pow2_negative_2(i32 %a) {
+; RV32I-LABEL: sdiv32_pow2_negative_2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srli a1, a0, 31
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 1
+; RV32I-NEXT:    neg a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv32_pow2_negative_2:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srli a1, a0, 31
+; RV32ZBT-NEXT:    add a0, a0, a1
+; RV32ZBT-NEXT:    srai a0, a0, 1
+; RV32ZBT-NEXT:    neg a0, a0
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv32_pow2_negative_2:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srliw a1, a0, 31
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 1
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv32_pow2_negative_2:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srliw a1, a0, 31
+; RV64ZBT-NEXT:    addw a0, a0, a1
+; RV64ZBT-NEXT:    sraiw a0, a0, 1
+; RV64ZBT-NEXT:    neg a0, a0
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i32 %a, -2
+  ret i32 %div
+}
+
+define i32 @sdiv32_pow2_2048(i32 %a) {
+; RV32I-LABEL: sdiv32_pow2_2048:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    srli a1, a1, 21
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 11
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv32_pow2_2048:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    slti a1, a0, 0
+; RV32ZBT-NEXT:    addi a2, a0, 2047
+; RV32ZBT-NEXT:    cmov a0, a1, a2, a0
+; RV32ZBT-NEXT:    srai a0, a0, 11
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv32_pow2_2048:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 21
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 11
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv32_pow2_2048:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    sext.w a1, a0
+; RV64ZBT-NEXT:    addi a2, a0, 2047
+; RV64ZBT-NEXT:    slti a1, a1, 0
+; RV64ZBT-NEXT:    cmov a0, a1, a2, a0
+; RV64ZBT-NEXT:    sraiw a0, a0, 11
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i32 %a, 2048
+  ret i32 %div
+}
+
+define i32 @sdiv32_pow2_negative_2048(i32 %a) {
+; RV32I-LABEL: sdiv32_pow2_negative_2048:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    srli a1, a1, 21
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 11
+; RV32I-NEXT:    neg a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv32_pow2_negative_2048:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    slti a1, a0, 0
+; RV32ZBT-NEXT:    addi a2, a0, 2047
+; RV32ZBT-NEXT:    cmov a0, a1, a2, a0
+; RV32ZBT-NEXT:    srai a0, a0, 11
+; RV32ZBT-NEXT:    neg a0, a0
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv32_pow2_negative_2048:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 21
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 11
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv32_pow2_negative_2048:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    sext.w a1, a0
+; RV64ZBT-NEXT:    addi a2, a0, 2047
+; RV64ZBT-NEXT:    slti a1, a1, 0
+; RV64ZBT-NEXT:    cmov a0, a1, a2, a0
+; RV64ZBT-NEXT:    sraiw a0, a0, 11
+; RV64ZBT-NEXT:    neg a0, a0
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i32 %a, -2048
+  ret i32 %div
+}
+
+define i32 @sdiv32_pow2_4096(i32 %a) {
+; RV32I-LABEL: sdiv32_pow2_4096:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    srli a1, a1, 20
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 12
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv32_pow2_4096:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a1, a0, 31
+; RV32ZBT-NEXT:    srli a1, a1, 20
+; RV32ZBT-NEXT:    add a0, a0, a1
+; RV32ZBT-NEXT:    srai a0, a0, 12
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv32_pow2_4096:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 20
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 12
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv32_pow2_4096:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    sraiw a1, a0, 31
+; RV64ZBT-NEXT:    srliw a1, a1, 20
+; RV64ZBT-NEXT:    addw a0, a0, a1
+; RV64ZBT-NEXT:    sraiw a0, a0, 12
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i32 %a, 4096
+  ret i32 %div
+}
+
+define i32 @sdiv32_pow2_negative_4096(i32 %a) {
+; RV32I-LABEL: sdiv32_pow2_negative_4096:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    srli a1, a1, 20
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 12
+; RV32I-NEXT:    neg a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv32_pow2_negative_4096:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a1, a0, 31
+; RV32ZBT-NEXT:    srli a1, a1, 20
+; RV32ZBT-NEXT:    add a0, a0, a1
+; RV32ZBT-NEXT:    srai a0, a0, 12
+; RV32ZBT-NEXT:    neg a0, a0
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv32_pow2_negative_4096:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 20
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 12
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv32_pow2_negative_4096:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    sraiw a1, a0, 31
+; RV64ZBT-NEXT:    srliw a1, a1, 20
+; RV64ZBT-NEXT:    addw a0, a0, a1
+; RV64ZBT-NEXT:    sraiw a0, a0, 12
+; RV64ZBT-NEXT:    neg a0, a0
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i32 %a, -4096
+  ret i32 %div
+}
+
+define i32 @sdiv32_pow2_65536(i32 %a) {
+; RV32I-LABEL: sdiv32_pow2_65536:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    srli a1, a1, 16
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 16
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv32_pow2_65536:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a1, a0, 31
+; RV32ZBT-NEXT:    srli a1, a1, 16
+; RV32ZBT-NEXT:    add a0, a0, a1
+; RV32ZBT-NEXT:    srai a0, a0, 16
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv32_pow2_65536:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv32_pow2_65536:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    sraiw a1, a0, 31
+; RV64ZBT-NEXT:    srliw a1, a1, 16
+; RV64ZBT-NEXT:    addw a0, a0, a1
+; RV64ZBT-NEXT:    sraiw a0, a0, 16
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i32 %a, 65536
+  ret i32 %div
+}
+
+define i32 @sdiv32_pow2_negative_65536(i32 %a) {
+; RV32I-LABEL: sdiv32_pow2_negative_65536:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    srli a1, a1, 16
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srai a0, a0, 16
+; RV32I-NEXT:    neg a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv32_pow2_negative_65536:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a1, a0, 31
+; RV32ZBT-NEXT:    srli a1, a1, 16
+; RV32ZBT-NEXT:    add a0, a0, a1
+; RV32ZBT-NEXT:    srai a0, a0, 16
+; RV32ZBT-NEXT:    neg a0, a0
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv32_pow2_negative_65536:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srliw a1, a1, 16
+; RV64I-NEXT:    addw a0, a0, a1
+; RV64I-NEXT:    sraiw a0, a0, 16
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv32_pow2_negative_65536:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    sraiw a1, a0, 31
+; RV64ZBT-NEXT:    srliw a1, a1, 16
+; RV64ZBT-NEXT:    addw a0, a0, a1
+; RV64ZBT-NEXT:    sraiw a0, a0, 16
+; RV64ZBT-NEXT:    neg a0, a0
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i32 %a, -65536
+  ret i32 %div
+}
+
+define i64 @sdiv64_pow2_2(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srli a2, a1, 31
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    srli a3, a2, 1
+; RV32I-NEXT:    sltu a0, a2, a0
+; RV32I-NEXT:    add a1, a1, a0
+; RV32I-NEXT:    slli a0, a1, 31
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    srai a1, a1, 1
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_2:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srli a2, a1, 31
+; RV32ZBT-NEXT:    add a2, a0, a2
+; RV32ZBT-NEXT:    sltu a0, a2, a0
+; RV32ZBT-NEXT:    add a1, a1, a0
+; RV32ZBT-NEXT:    fsri a0, a2, a1, 1
+; RV32ZBT-NEXT:    srai a1, a1, 1
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_2:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srli a1, a0, 63
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_2:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srli a1, a0, 63
+; RV64ZBT-NEXT:    add a0, a0, a1
+; RV64ZBT-NEXT:    srai a0, a0, 1
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, 2
+  ret i64 %div
+}
+
+define i64 @sdiv64_pow2_negative_2(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_negative_2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srli a2, a1, 31
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    srli a3, a2, 1
+; RV32I-NEXT:    sltu a0, a2, a0
+; RV32I-NEXT:    add a1, a1, a0
+; RV32I-NEXT:    slli a0, a1, 31
+; RV32I-NEXT:    or a2, a3, a0
+; RV32I-NEXT:    neg a0, a2
+; RV32I-NEXT:    snez a2, a2
+; RV32I-NEXT:    srai a1, a1, 1
+; RV32I-NEXT:    add a1, a1, a2
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_negative_2:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srli a2, a1, 31
+; RV32ZBT-NEXT:    add a2, a0, a2
+; RV32ZBT-NEXT:    sltu a0, a2, a0
+; RV32ZBT-NEXT:    add a1, a1, a0
+; RV32ZBT-NEXT:    fsri a2, a2, a1, 1
+; RV32ZBT-NEXT:    neg a0, a2
+; RV32ZBT-NEXT:    snez a2, a2
+; RV32ZBT-NEXT:    srai a1, a1, 1
+; RV32ZBT-NEXT:    add a1, a1, a2
+; RV32ZBT-NEXT:    neg a1, a1
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_negative_2:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srli a1, a0, 63
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 1
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_negative_2:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srli a1, a0, 63
+; RV64ZBT-NEXT:    add a0, a0, a1
+; RV64ZBT-NEXT:    srai a0, a0, 1
+; RV64ZBT-NEXT:    neg a0, a0
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, -2
+  ret i64 %div
+}
+
+define i64 @sdiv64_pow2_2048(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_2048:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a2, a1, 31
+; RV32I-NEXT:    srli a2, a2, 21
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    srli a3, a2, 11
+; RV32I-NEXT:    sltu a0, a2, a0
+; RV32I-NEXT:    add a1, a1, a0
+; RV32I-NEXT:    slli a0, a1, 21
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    srai a1, a1, 11
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_2048:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a2, a1, 31
+; RV32ZBT-NEXT:    srli a2, a2, 21
+; RV32ZBT-NEXT:    add a2, a0, a2
+; RV32ZBT-NEXT:    sltu a0, a2, a0
+; RV32ZBT-NEXT:    add a1, a1, a0
+; RV32ZBT-NEXT:    fsri a0, a2, a1, 11
+; RV32ZBT-NEXT:    srai a1, a1, 11
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_2048:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    srli a1, a1, 53
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 11
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_2048:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    slti a1, a0, 0
+; RV64ZBT-NEXT:    addi a2, a0, 2047
+; RV64ZBT-NEXT:    cmov a0, a1, a2, a0
+; RV64ZBT-NEXT:    srai a0, a0, 11
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, 2048
+  ret i64 %div
+}
+
+define i64 @sdiv64_pow2_negative_2048(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_negative_2048:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a2, a1, 31
+; RV32I-NEXT:    srli a2, a2, 21
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    srli a3, a2, 11
+; RV32I-NEXT:    sltu a0, a2, a0
+; RV32I-NEXT:    add a1, a1, a0
+; RV32I-NEXT:    slli a0, a1, 21
+; RV32I-NEXT:    or a2, a3, a0
+; RV32I-NEXT:    neg a0, a2
+; RV32I-NEXT:    snez a2, a2
+; RV32I-NEXT:    srai a1, a1, 11
+; RV32I-NEXT:    add a1, a1, a2
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_negative_2048:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a2, a1, 31
+; RV32ZBT-NEXT:    srli a2, a2, 21
+; RV32ZBT-NEXT:    add a2, a0, a2
+; RV32ZBT-NEXT:    sltu a0, a2, a0
+; RV32ZBT-NEXT:    add a1, a1, a0
+; RV32ZBT-NEXT:    fsri a2, a2, a1, 11
+; RV32ZBT-NEXT:    neg a0, a2
+; RV32ZBT-NEXT:    snez a2, a2
+; RV32ZBT-NEXT:    srai a1, a1, 11
+; RV32ZBT-NEXT:    add a1, a1, a2
+; RV32ZBT-NEXT:    neg a1, a1
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_negative_2048:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    srli a1, a1, 53
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 11
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_negative_2048:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    slti a1, a0, 0
+; RV64ZBT-NEXT:    addi a2, a0, 2047
+; RV64ZBT-NEXT:    cmov a0, a1, a2, a0
+; RV64ZBT-NEXT:    srai a0, a0, 11
+; RV64ZBT-NEXT:    neg a0, a0
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, -2048
+  ret i64 %div
+}
+
+define i64 @sdiv64_pow2_4096(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_4096:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a2, a1, 31
+; RV32I-NEXT:    srli a2, a2, 20
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    srli a3, a2, 12
+; RV32I-NEXT:    sltu a0, a2, a0
+; RV32I-NEXT:    add a1, a1, a0
+; RV32I-NEXT:    slli a0, a1, 20
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    srai a1, a1, 12
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_4096:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a2, a1, 31
+; RV32ZBT-NEXT:    srli a2, a2, 20
+; RV32ZBT-NEXT:    add a2, a0, a2
+; RV32ZBT-NEXT:    sltu a0, a2, a0
+; RV32ZBT-NEXT:    add a1, a1, a0
+; RV32ZBT-NEXT:    fsri a0, a2, a1, 12
+; RV32ZBT-NEXT:    srai a1, a1, 12
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_4096:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    srli a1, a1, 52
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 12
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_4096:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srai a1, a0, 63
+; RV64ZBT-NEXT:    srli a1, a1, 52
+; RV64ZBT-NEXT:    add a0, a0, a1
+; RV64ZBT-NEXT:    srai a0, a0, 12
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, 4096
+  ret i64 %div
+}
+
+define i64 @sdiv64_pow2_negative_4096(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_negative_4096:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a2, a1, 31
+; RV32I-NEXT:    srli a2, a2, 20
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    srli a3, a2, 12
+; RV32I-NEXT:    sltu a0, a2, a0
+; RV32I-NEXT:    add a1, a1, a0
+; RV32I-NEXT:    slli a0, a1, 20
+; RV32I-NEXT:    or a2, a3, a0
+; RV32I-NEXT:    neg a0, a2
+; RV32I-NEXT:    snez a2, a2
+; RV32I-NEXT:    srai a1, a1, 12
+; RV32I-NEXT:    add a1, a1, a2
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_negative_4096:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a2, a1, 31
+; RV32ZBT-NEXT:    srli a2, a2, 20
+; RV32ZBT-NEXT:    add a2, a0, a2
+; RV32ZBT-NEXT:    sltu a0, a2, a0
+; RV32ZBT-NEXT:    add a1, a1, a0
+; RV32ZBT-NEXT:    fsri a2, a2, a1, 12
+; RV32ZBT-NEXT:    neg a0, a2
+; RV32ZBT-NEXT:    snez a2, a2
+; RV32ZBT-NEXT:    srai a1, a1, 12
+; RV32ZBT-NEXT:    add a1, a1, a2
+; RV32ZBT-NEXT:    neg a1, a1
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_negative_4096:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    srli a1, a1, 52
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 12
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_negative_4096:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srai a1, a0, 63
+; RV64ZBT-NEXT:    srli a1, a1, 52
+; RV64ZBT-NEXT:    add a0, a0, a1
+; RV64ZBT-NEXT:    srai a0, a0, 12
+; RV64ZBT-NEXT:    neg a0, a0
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, -4096
+  ret i64 %div
+}
+
+define i64 @sdiv64_pow2_65536(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_65536:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a2, a1, 31
+; RV32I-NEXT:    srli a2, a2, 16
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    srli a3, a2, 16
+; RV32I-NEXT:    sltu a0, a2, a0
+; RV32I-NEXT:    add a1, a1, a0
+; RV32I-NEXT:    slli a0, a1, 16
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_65536:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a2, a1, 31
+; RV32ZBT-NEXT:    srli a2, a2, 16
+; RV32ZBT-NEXT:    add a2, a0, a2
+; RV32ZBT-NEXT:    sltu a0, a2, a0
+; RV32ZBT-NEXT:    add a1, a1, a0
+; RV32ZBT-NEXT:    fsri a0, a2, a1, 16
+; RV32ZBT-NEXT:    srai a1, a1, 16
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_65536:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    srli a1, a1, 48
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_65536:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srai a1, a0, 63
+; RV64ZBT-NEXT:    srli a1, a1, 48
+; RV64ZBT-NEXT:    add a0, a0, a1
+; RV64ZBT-NEXT:    srai a0, a0, 16
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, 65536
+  ret i64 %div
+}
+
+define i64 @sdiv64_pow2_negative_65536(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_negative_65536:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srai a2, a1, 31
+; RV32I-NEXT:    srli a2, a2, 16
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    srli a3, a2, 16
+; RV32I-NEXT:    sltu a0, a2, a0
+; RV32I-NEXT:    add a1, a1, a0
+; RV32I-NEXT:    slli a0, a1, 16
+; RV32I-NEXT:    or a2, a3, a0
+; RV32I-NEXT:    neg a0, a2
+; RV32I-NEXT:    snez a2, a2
+; RV32I-NEXT:    srai a1, a1, 16
+; RV32I-NEXT:    add a1, a1, a2
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_negative_65536:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srai a2, a1, 31
+; RV32ZBT-NEXT:    srli a2, a2, 16
+; RV32ZBT-NEXT:    add a2, a0, a2
+; RV32ZBT-NEXT:    sltu a0, a2, a0
+; RV32ZBT-NEXT:    add a1, a1, a0
+; RV32ZBT-NEXT:    fsri a2, a2, a1, 16
+; RV32ZBT-NEXT:    neg a0, a2
+; RV32ZBT-NEXT:    snez a2, a2
+; RV32ZBT-NEXT:    srai a1, a1, 16
+; RV32ZBT-NEXT:    add a1, a1, a2
+; RV32ZBT-NEXT:    neg a1, a1
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_negative_65536:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    srli a1, a1, 48
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 16
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_negative_65536:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srai a1, a0, 63
+; RV64ZBT-NEXT:    srli a1, a1, 48
+; RV64ZBT-NEXT:    add a0, a0, a1
+; RV64ZBT-NEXT:    srai a0, a0, 16
+; RV64ZBT-NEXT:    neg a0, a0
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, -65536
+  ret i64 %div
+}
+
+define i64 @sdiv64_pow2_8589934592(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_8589934592:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srli a2, a1, 31
+; RV32I-NEXT:    add a2, a1, a2
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    add a1, a0, a1
+; RV32I-NEXT:    sltu a0, a1, a0
+; RV32I-NEXT:    add a1, a2, a0
+; RV32I-NEXT:    srai a0, a1, 1
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_8589934592:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srli a2, a1, 31
+; RV32ZBT-NEXT:    add a2, a1, a2
+; RV32ZBT-NEXT:    srai a1, a1, 31
+; RV32ZBT-NEXT:    fsri a1, a1, a1, 31
+; RV32ZBT-NEXT:    add a1, a0, a1
+; RV32ZBT-NEXT:    sltu a0, a1, a0
+; RV32ZBT-NEXT:    add a1, a2, a0
+; RV32ZBT-NEXT:    srai a0, a1, 1
+; RV32ZBT-NEXT:    srai a1, a1, 31
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_8589934592:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    srli a1, a1, 31
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 33
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_8589934592:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srai a1, a0, 63
+; RV64ZBT-NEXT:    srli a1, a1, 31
+; RV64ZBT-NEXT:    add a0, a0, a1
+; RV64ZBT-NEXT:    srai a0, a0, 33
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, 8589934592 ; 2^33
+  ret i64 %div
+}
+
+define i64 @sdiv64_pow2_negative_8589934592(i64 %a) {
+; RV32I-LABEL: sdiv64_pow2_negative_8589934592:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srli a2, a1, 31
+; RV32I-NEXT:    add a2, a1, a2
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    add a1, a0, a1
+; RV32I-NEXT:    sltu a0, a1, a0
+; RV32I-NEXT:    add a0, a2, a0
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    srai a0, a0, 1
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    add a1, a1, a2
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    neg a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBT-LABEL: sdiv64_pow2_negative_8589934592:
+; RV32ZBT:       # %bb.0: # %entry
+; RV32ZBT-NEXT:    srli a2, a1, 31
+; RV32ZBT-NEXT:    add a2, a1, a2
+; RV32ZBT-NEXT:    srai a1, a1, 31
+; RV32ZBT-NEXT:    fsri a1, a1, a1, 31
+; RV32ZBT-NEXT:    add a1, a0, a1
+; RV32ZBT-NEXT:    sltu a0, a1, a0
+; RV32ZBT-NEXT:    add a0, a2, a0
+; RV32ZBT-NEXT:    srai a1, a0, 31
+; RV32ZBT-NEXT:    srai a0, a0, 1
+; RV32ZBT-NEXT:    snez a2, a0
+; RV32ZBT-NEXT:    add a1, a1, a2
+; RV32ZBT-NEXT:    neg a1, a1
+; RV32ZBT-NEXT:    neg a0, a0
+; RV32ZBT-NEXT:    ret
+;
+; RV64I-LABEL: sdiv64_pow2_negative_8589934592:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srai a1, a0, 63
+; RV64I-NEXT:    srli a1, a1, 31
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 33
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBT-LABEL: sdiv64_pow2_negative_8589934592:
+; RV64ZBT:       # %bb.0: # %entry
+; RV64ZBT-NEXT:    srai a1, a0, 63
+; RV64ZBT-NEXT:    srli a1, a1, 31
+; RV64ZBT-NEXT:    add a0, a0, a1
+; RV64ZBT-NEXT:    srai a0, a0, 33
+; RV64ZBT-NEXT:    neg a0, a0
+; RV64ZBT-NEXT:    ret
+entry:
+  %div = sdiv i64 %a, -8589934592 ; -2^33
+  ret i64 %div
+}


        


More information about the llvm-commits mailing list