[llvm] [BPF] Support signed division at cpu v1 with constant divisor (PR #110627)

Mon Sep 30 22:30:32 PDT 2024

https://github.com/yonghong-song updated https://github.com/llvm/llvm-project/pull/110627

>From a14b09df5150a92cc65a9c25c171d5ff1a4eb78c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song at linux.dev>
Date: Mon, 30 Sep 2024 15:11:31 -0700
Subject: [PATCH] [BPF] Support signed division at cpu v1 with constant divisor

The motivation example likes below
  $ cat t1.c
  struct S {
    int var[3];
  };
  int foo1 (struct S *a, struct S *b)
  {
      return a - b;
  }

For cpu v1/v2/v3, the compilation will fail with the following
errors:
  $ clang --target=bpf -O2 -c t1.c -mcpu=v3
  t1.c:4:5: error: unsupported signed division, please convert to unsigned div/mod.
      4 | int foo1 (struct S *a, struct S *b)
        |     ^
  1 error generated.

The reason is that sdiv/smod is supported at -mcpu=v4.
At cpu v1/v2/v3, only udiv/umod is supported.

But the above example (for func foo1()) is reasonable common and
user has to workaround the compilation failure by using udiv with
conditionals.

For x86, for the above t1.c, compile and dump the asm code like below:
  $ clang -O2 -c t1.c && llvm-objdump -d t1.o
  0000000000000000 <foo1>:
       0: 48 29 f7                      subq    %rsi, %rdi
       3: 48 c1 ef 02                   shrq    $0x2, %rdi
       7: 69 c7 ab aa aa aa             imull   $0xaaaaaaab, %edi, %eax # imm = 0xAAAAAAAB
       d: c3                            retq

Basically sdiv can be replaced with sub, shr and imul. Latest gcc-bpf
is also able to generate code similar to x86 with -mcpu=v1.
See https://godbolt.org/z/feP9ETbjj

So let us add clang support for sdiv (constant divisor) as well at
-mcpu=v1. But we still want to keep udiv untouched at -mcpu=v1.
One more parameter "bool IsSigned" is added to isIntDivCheap().
The "IsSigned" parameter is used only by BPF backend to ensure
udiv not impacted.

Note that only 32-bit sdiv (constant divisor) can be converted into
sub/shr/imul. 64-bit sdiv (constant divisor) cannot be converted
since bpf does not support 64-bit multiplication without potential
overflow.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  7 +-
 llvm/include/llvm/CodeGen/TargetLowering.h    |  4 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  4 +-
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +--
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 12 +--
 .../Target/AArch64/AArch64ISelLowering.cpp    |  7 +-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  2 +-
 llvm/lib/Target/BPF/BPFISelLowering.cpp       |  5 ++
 llvm/lib/Target/BPF/BPFISelLowering.h         |  2 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  7 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |  2 +-
 llvm/lib/Target/VE/VEISelLowering.h           |  2 +-
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  2 +-
 .../WebAssembly/WebAssemblyISelLowering.h     |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  5 +-
 llvm/lib/Target/X86/X86ISelLowering.h         |  3 +-
 llvm/test/CodeGen/BPF/sdiv_error.ll           |  6 +-
 llvm/test/CodeGen/BPF/sdiv_to_mul.ll          | 80 +++++++++++++++++++
 llvm/test/CodeGen/BPF/srem_error.ll           |  6 +-
 19 files changed, 133 insertions(+), 35 deletions(-)
 create mode 100644 llvm/test/CodeGen/BPF/sdiv_to_mul.ll

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c36a346c1b2e05..fbe1fdca3b1a4f 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -565,7 +565,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       if (!isa<ConstantInt>(Inst.getOperand(1)))
         return false;
       EVT VT = getTLI()->getValueType(DL, Inst.getType());
-      return !getTLI()->isIntDivCheap(VT, Fn.getAttributes());
+
+      bool IsSigned = true;
+      if (Inst.getOpcode() == Instruction::SDiv ||
+          Inst.getOpcode() == Instruction::SRem)
+        IsSigned = false;
+      return !getTLI()->isIntDivCheap(VT, IsSigned, Fn.getAttributes());
     }
     };
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 3842af56e6b3d7..f61aeceef72977 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -549,7 +549,9 @@ class TargetLoweringBase {
   /// several shifts, adds, and multiplies for this target.
   /// The definition of "cheaper" may depend on whether we're optimizing
   /// for speed or for size.
-  virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
+  virtual bool isIntDivCheap(EVT VT, bool IsSigned, AttributeList Attr) const {
+    return false;
+  }
 
   /// Return true if the target can handle a standalone remainder operation.
   virtual bool hasStandaloneRem(EVT VT) const {
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c279289f9161bf..a21f8501412b32 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -5376,7 +5376,7 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
   const auto &TLI = getTargetLowering();
   LLVMContext &Ctx = MF.getFunction().getContext();
   auto &DL = MF.getDataLayout();
-  if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+  if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), false, Attr))
     return false;
 
   // Don't do this for minsize because the instruction sequence is usually
@@ -5426,7 +5426,7 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) {
   const auto &TLI = getTargetLowering();
   LLVMContext &Ctx = MF.getFunction().getContext();
   auto &DL = MF.getDataLayout();
-  if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+  if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), true, Attr))
     return false;
 
   // Don't do this for minsize because the instruction sequence is usually
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 65a620b70d8f0c..76ca87916d3217 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4844,7 +4844,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
   // true.  Otherwise, we break the simplification logic in visitREM().
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), true, Attr))
     if (SDValue DivRem = useDivRem(N))
         return DivRem;
 
@@ -4929,7 +4929,7 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   // trade-offs.
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   if (isConstantOrConstantVector(N1) &&
-      !TLI.isIntDivCheap(N->getValueType(0), Attr))
+      !TLI.isIntDivCheap(N->getValueType(0), true, Attr))
     if (SDValue Op = BuildSDIV(N))
       return Op;
 
@@ -4984,7 +4984,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
   // true.  Otherwise, we break the simplification logic in visitREM().
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), false, Attr))
     if (SDValue DivRem = useDivRem(N))
         return DivRem;
 
@@ -5033,7 +5033,7 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
   // fold (udiv x, c) -> alternate
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   if (isConstantOrConstantVector(N1) &&
-      !TLI.isIntDivCheap(N->getValueType(0), Attr))
+      !TLI.isIntDivCheap(N->getValueType(0), false, Attr))
     if (SDValue Op = BuildUDIV(N))
       return Op;
 
@@ -5115,7 +5115,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
   // combine will not return a DIVREM.  Regardless, checking cheapness here
   // makes sense since the simplification results in fatter code.
-  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
+  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, isSigned, Attr)) {
     if (isSigned) {
       // check if we can build faster implementation for srem
       if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f19975557a0a77..c65a906abe062c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5377,11 +5377,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
     // When division is cheap or optimizing for minimum size,
     // fall through to DIVREM creation by skipping this fold.
-    if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
-      if (N0.getOpcode() == ISD::UREM) {
+    bool IsSigned = N0.getOpcode() == ISD::SREM;
+    if (!isIntDivCheap(VT, IsSigned, Attr) &&
+        !Attr.hasFnAttr(Attribute::MinSize)) {
+      if (!IsSigned) {
         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
           return Folded;
-      } else if (N0.getOpcode() == ISD::SREM) {
+      } else {
         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
           return Folded;
       }
@@ -6233,7 +6235,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
                               SelectionDAG &DAG,
                               SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isIntDivCheap(N->getValueType(0), Attr))
+  if (isIntDivCheap(N->getValueType(0), true, Attr))
     return SDValue(N, 0); // Lower SDIV as SDIV
   return SDValue();
 }
@@ -6243,7 +6245,7 @@ TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
                               SelectionDAG &DAG,
                               SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isIntDivCheap(N->getValueType(0), Attr))
+  if (isIntDivCheap(N->getValueType(0), true, Attr))
     return SDValue(N, 0); // Lower SREM as SREM
   return SDValue();
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4166d9bd22bc01..27471f18c971b9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18544,7 +18544,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
                                      SelectionDAG &DAG,
                                      SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isIntDivCheap(N->getValueType(0), Attr))
+  if (isIntDivCheap(N->getValueType(0), true, Attr))
     return SDValue(N, 0); // Lower SDIV as SDIV
 
   EVT VT = N->getValueType(0);
@@ -18574,7 +18574,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
                                      SelectionDAG &DAG,
                                      SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isIntDivCheap(N->getValueType(0), Attr))
+  if (isIntDivCheap(N->getValueType(0), true, Attr))
     return SDValue(N, 0); // Lower SREM as SREM
 
   EVT VT = N->getValueType(0);
@@ -27816,7 +27816,8 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
   }
 }
 
-bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
+bool AArch64TargetLowering::isIntDivCheap(EVT VT, bool IsSigned,
+                                          AttributeList Attr) const {
   // Integer division on AArch64 is expensive. However, when aggressively
   // optimizing for code size, we prefer to use a div instruction, as it is
   // usually smaller than the alternative sequence.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 480bf60360bf55..c5e52122d298a5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -845,7 +845,7 @@ class AArch64TargetLowering : public TargetLowering {
     return AArch64::X1;
   }
 
-  bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
+  bool isIntDivCheap(EVT VT, bool IsSigned, AttributeList Attr) const override;
 
   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
                         const MachineFunction &MF) const override {
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index ff23d3b055d0d5..5d38247844fc70 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -238,6 +238,11 @@ bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   return TargetLoweringBase::isZExtFree(Val, VT2);
 }
 
+bool BPFTargetLowering::isIntDivCheap(EVT VT, bool IsSigned,
+                                      AttributeList Attr) const {
+  return (HasMovsx || !IsSigned) ? true : false;
+}
+
 BPFTargetLowering::ConstraintType
 BPFTargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 42707949e864cd..ca79ee87b2c5f2 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -118,7 +118,7 @@ class BPFTargetLowering : public TargetLowering {
     return Op.size() >= 8 ? MVT::i64 : MVT::i32;
   }
 
-  bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; }
+  bool isIntDivCheap(EVT VT, bool IsSigned, AttributeList Attr) const override;
 
   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                          Type *Ty) const override {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e77f8783f17271..f908e855a31219 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12680,7 +12680,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     // to multiply by magic constant.
     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
     if (N->getOperand(1).getOpcode() == ISD::Constant &&
-        !isIntDivCheap(N->getValueType(0), Attr))
+        !isIntDivCheap(N->getValueType(0), N->getOpcode() == ISD::SDIV, Attr))
       return;
 
     // If the input is i32, use ANY_EXTEND since the W instructions don't read
@@ -21275,7 +21275,8 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
   return SDValue();
 }
 
-bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
+bool RISCVTargetLowering::isIntDivCheap(EVT VT, bool IsSigned,
+                                        AttributeList Attr) const {
   // When aggressively optimizing for code size, we prefer to use a div
   // instruction, as it is usually smaller than the alternative sequence.
   // TODO: Add vector division?
@@ -21745,7 +21746,7 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
                                    SelectionDAG &DAG,
                                    SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isIntDivCheap(N->getValueType(0), Attr))
+  if (isIntDivCheap(N->getValueType(0), true, Attr))
     return SDValue(N, 0); // Lower SDIV as SDIV
 
   // Only perform this transform if short forward branch opt is supported.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 05581552ab6041..d6abf869570853 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -536,7 +536,7 @@ class RISCVTargetLowering : public TargetLowering {
   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
                                unsigned Index) const override;
 
-  bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
+  bool isIntDivCheap(EVT VT, bool IsSigned, AttributeList Attr) const override;
 
   bool preferScalarizeSplat(SDNode *N) const override;
 
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 8b9412d786625d..9b0309d969873d 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -329,7 +329,7 @@ class VETargetLowering : public TargetLowering {
   unsigned getMinimumJumpTableEntries() const override;
 
   // SX-Aurora VE's s/udiv is 5-9 times slower than multiply.
-  bool isIntDivCheap(EVT, AttributeList) const override { return false; }
+  bool isIntDivCheap(EVT, bool, AttributeList) const override { return false; }
   // VE doesn't have rem.
   bool hasStandaloneRem(EVT) const override { return false; }
   // VE LDZ instruction returns 64 if the input is zero.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index fa78bf38f426cd..b3fecc9e5ca5d6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -821,7 +821,7 @@ bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
   return true;
 }
 
-bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
+bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, bool IsSigned,
                                               AttributeList Attr) const {
   // The current thinking is that wasm engines will perform this optimization,
   // so we can save on code size.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 7d9cfb7739e435..cf573d0af83b0e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -73,7 +73,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
   bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, Align Alignment,
                                       MachineMemOperand::Flags Flags,
                                       unsigned *Fast) const override;
-  bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
+  bool isIntDivCheap(EVT VT, bool IsSigned, AttributeList Attr) const override;
   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
   bool shouldSinkOperands(Instruction *I,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 73f7f52846f625..b0115796228de8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23094,7 +23094,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
                                  SelectionDAG &DAG,
                                  SmallVectorImpl<SDNode *> &Created) const {
   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
-  if (isIntDivCheap(N->getValueType(0), Attr))
+  if (isIntDivCheap(N->getValueType(0), true, Attr))
     return SDValue(N,0); // Lower SDIV as SDIV
 
   assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
@@ -60048,7 +60048,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   return Res;
 }
 
-bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
+bool X86TargetLowering::isIntDivCheap(EVT VT, bool IsSigned,
+                                      AttributeList Attr) const {
   // Integer division on x86 is expensive. However, when aggressively optimizing
   // for code size, we prefer to use a div instruction, as it is usually smaller
   // than the alternative sequence.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 0ab42f032c3ea6..3ea93982427b3f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1606,7 +1606,8 @@ namespace llvm {
         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
         unsigned &NumIntermediates, MVT &RegisterVT) const override;
 
-    bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
+    bool isIntDivCheap(EVT VT, bool IsSigned,
+                       AttributeList Attr) const override;
 
     bool supportSwiftError() const override;
 
diff --git a/llvm/test/CodeGen/BPF/sdiv_error.ll b/llvm/test/CodeGen/BPF/sdiv_error.ll
index a5dcc6271224b0..05394f3e05de89 100644
--- a/llvm/test/CodeGen/BPF/sdiv_error.ll
+++ b/llvm/test/CodeGen/BPF/sdiv_error.ll
@@ -2,7 +2,7 @@
 ; RUN: FileCheck %s < %t1
 ; CHECK: unsupported signed division
 
-define i32 @test(i32 %len) {
-  %1 = sdiv i32 %len, 15
-  ret i32 %1
+define i64 @test(i64 %len) {
+  %1 = sdiv i64 %len, 15
+  ret i64 %1
 }
diff --git a/llvm/test/CodeGen/BPF/sdiv_to_mul.ll b/llvm/test/CodeGen/BPF/sdiv_to_mul.ll
new file mode 100644
index 00000000000000..56d068e4bd4373
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/sdiv_to_mul.ll
@@ -0,0 +1,80 @@
+; RUN: llc -O2 -march=bpfel -mcpu=v1 < %s | FileCheck --check-prefix=CHECK-V1 %s
+; RUN: llc -O2 -march=bpfel -mcpu=v3 < %s | FileCheck --check-prefix=CHECK-V3 %s
+; RUN: llc -O2 -march=bpfel -mcpu=v4 < %s | FileCheck --check-prefix=CHECK-V4 %s
+;
+; Source:
+;   struct S {
+;     int var[3];
+;   };
+;   int foo1 (struct S *a, struct S *b)
+;   {
+;     return a - b;
+;   }
+;   int foo2(int a)
+;   {
+;     return a/15;
+;   }
+;   int foo3(int a)
+;   {
+;     return a%15;
+;   }
+
+target triple = "bpf"
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i32 @foo1(ptr noundef %a, ptr noundef %b) local_unnamed_addr #0 {
+entry:
+  %sub.ptr.lhs.cast = ptrtoint ptr %a to i64
+  %sub.ptr.rhs.cast = ptrtoint ptr %b to i64
+  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 12
+  %conv = trunc i64 %sub.ptr.div to i32
+  ret i32 %conv
+}
+; CHECK-V1:        r0 = r1
+; CHECK-V1:        r0 -= r2
+; CHECK-V1:        r0 s>>= 2
+; CHECK-V1:        r1 = -6148914691236517205 ll
+; CHECK-V1:        r0 *= r1
+; CHECK-V1:        exit
+
+; CHECK-V3:        r0 = r1
+; CHECK-V3:        r0 -= r2
+; CHECK-V3:        r0 >>= 2
+; CHECK-V3:        w0 *= -1431655765
+; CHECK-V3:        exit
+
+; CHECK-V4:        r0 = r1
+; CHECK-V4:        r0 -= r2
+; CHECK-V4:        r0 s/= 12
+; CHECK-V4:        exit
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local noundef range(i32 -143165576, 143165577) i32 @foo2(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+  %div = sdiv i32 %a, 15
+  ret i32 %div
+}
+; CHECK-V1-NOT:   r[[#]] s/= 15
+; CHECK-V3-NOT:   w[[#]] s/= 15
+; CHECK-V4:       w[[#]] s/= 15
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local noundef range(i32 -14, 15) i32 @foo3(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+  %rem = srem i32 %a, 15
+  ret i32 %rem
+}
+
+; CHECK-V1-NOT:   r[[#]] s%= 15
+; CHECK-V3-NOT:   w[[#]] s%= 15
+; CHECK-V4:       w[[#]] s%= 15
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 20.0.0git (git at github.com:yonghong-song/llvm-project.git 238f3f994a96c511134ca1bc11d2d03e4368a0c1)"}
diff --git a/llvm/test/CodeGen/BPF/srem_error.ll b/llvm/test/CodeGen/BPF/srem_error.ll
index 883e72d32dc7ed..8245a47b1ddd21 100644
--- a/llvm/test/CodeGen/BPF/srem_error.ll
+++ b/llvm/test/CodeGen/BPF/srem_error.ll
@@ -2,7 +2,7 @@
 ; RUN: FileCheck %s < %t1
 ; CHECK: unsupported signed division
 
-define i32 @test(i32 %len) {
-  %1 = srem i32 %len, 15
-  ret i32 %1
+define i64 @test(i64 %len) {
+  %1 = srem i64 %len, 15
+  ret i64 %1
 }