[llvm] [LLVM] Improve the DemandedBits Analysis (PR #148853)

Panagiotis K via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 16 00:40:57 PDT 2025


https://github.com/karouzakisp updated https://github.com/llvm/llvm-project/pull/148853

>From 78db1799107b2b3043b063150fd78626ba2fb2ea Mon Sep 17 00:00:00 2001
From: Panagiotis Karouzakis <karouzakispan at gmail.com>
Date: Tue, 15 Jul 2025 18:01:29 +0300
Subject: [PATCH 1/6] [LLVM] DemandedBits: Propagate demanded bits through
 div/rem ops

---
 llvm/lib/Analysis/DemandedBits.cpp         |  54 +++++
 llvm/test/Analysis/DemandedBits/div_rem.ll | 266 +++++++++++++++++++++
 2 files changed, 320 insertions(+)
 create mode 100644 llvm/test/Analysis/DemandedBits/div_rem.ll

diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index 6694d5cc06c8c..7327bc4706ffc 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstdint>
@@ -246,6 +247,59 @@ void DemandedBits::determineLiveOperandBits(
     else
       AB &= ~(Known.One & ~Known2.One);
     break;
+  case Instruction::SRem:
+  case Instruction::URem:
+  case Instruction::UDiv:
+  case Instruction::SDiv: {
+    auto Opc = UserI->getOpcode();
+    auto IsDiv = Opc == Instruction::UDiv || Opc == Instruction::SDiv;
+    bool IsSigned = Opc == Instruction::SDiv || Opc == Instruction::SRem;
+    if (OperandNo == 0) {
+      const APInt *DivAmnt;
+      if (match(UserI->getOperand(1), m_APInt(DivAmnt))) {
+        uint64_t D = DivAmnt->getZExtValue();
+        if (isPowerOf2_64(D)) {
+          unsigned Sh = Log2_64(D);
+          if (IsDiv) {
+            AB = AOut.shl(Sh);
+          } else {
+            AB = AOut & APInt::getLowBitsSet(BitWidth, Sh);
+          }
+        } else if (IsDiv) { // Non power of 2 constant div
+          //   x =  q * C + r;
+          //   q = x / C;
+          //   We think of it like grade school division in base 2.
+          //
+          //    x = [   unused   |  window m-bits |  ...  | needed bits ]
+          //                         ^ each step emits 1 quotient bit
+          //                         |
+          //                         |
+          //   C fits in m = ⌈log₂ C⌉ bits
+          //   Each new quotient bit consumes the window of m low bits and
+          //   shifts one position left.
+
+          //   To produce the first LowQ quotient/rem bits we slide the window
+          //   LowQ times --> need at most LowQ + m low bits of the dividend.
+          //   Need = LowQ + Ceil(log2(C))             (+1 sign bit for
+          //   sdiv/srem). For example : Assume x = b7 b6 b5 b4 b3 b2 b1 b0.
+          //   LowQ = 4, C = 5 and ceil(log_2(C)) = 3.
+          //   step 0: b2 b1 b0, produces quotient q[0].
+          //   step 1: b3 b2 b1, produces quotient q[1].
+          //   step 2: b4 b3 b2, produces quotient q[2].
+          //   step 3: b5 b4 b3, produces quotient q[3].
+          //   k = LowQ - 1;
+          //   TopIndex = k + m-1 = 3 + 2 = 5;
+          //   The dividend bits b5...b0 are enough we don't care for b6 and b7.
+          unsigned LowQ = AOut.getActiveBits();
+          unsigned Need = LowQ + Log2_64_Ceil(D);
+          if (IsSigned)
+            Need++;
+          AB = APInt::getLowBitsSet(BitWidth, std::min(BitWidth, Need));
+        }
+      }
+    }
+    break;
+  }
   case Instruction::Xor:
   case Instruction::PHI:
     AB = AOut;
diff --git a/llvm/test/Analysis/DemandedBits/div_rem.ll b/llvm/test/Analysis/DemandedBits/div_rem.ll
new file mode 100644
index 0000000000000..c27e88f321ac4
--- /dev/null
+++ b/llvm/test/Analysis/DemandedBits/div_rem.ll
@@ -0,0 +1,266 @@
+; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
+
+define i8 @test_sdiv_const_amount_4(i32 %a) {
+; CHECK-LABEL: 'test_sdiv_const_amount_4'
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for   %div = sdiv i32 %a, 4
+; CHECK-DAG: DemandedBits: 0x3fc for %a in   %div = sdiv i32 %a, 4
+; CHECK-DAG: DemandedBits: 0xffffffff for 4 in   %div = sdiv i32 %a, 4
+;
+  %div = sdiv i32 %a, 4
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_sdiv_const_amount_5(i32 %a) {
+; CHECK-LABEL: 'test_sdiv_const_amount_5'
+; CHECK-DAG: DemandedBits: 0xff for   %div = sdiv i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xfff for %a in   %div = sdiv i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xffffffff for 5 in   %div = sdiv i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+;
+  %div = sdiv i32 %a, 5
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_sdiv_const_amount_8(i32 %a) {
+; CHECK-LABEL: 'test_sdiv_const_amount_8'
+; CHECK-DAG: DemandedBits: 0xff for   %div = sdiv i32 %a, 8
+; CHECK-DAG: DemandedBits: 0x7f8 for %a in   %div = sdiv i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xffffffff for 8 in   %div = sdiv i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+;
+  %div = sdiv i32 %a, 8
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_sdiv_const_amount_9(i32 %a) {
+; CHECK-LABEL: 'test_sdiv_const_amount_9'
+; CHECK-DAG: DemandedBits: 0xff for   %div = udiv i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xfff for %a in   %div = udiv i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xffffffff for 9 in   %div = udiv i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+;
+  %div = udiv i32 %a, 9
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_sdiv(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_sdiv'
+; CHECK-DAG: DemandedBits: 0xff for   %div = sdiv i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %div = sdiv i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %div = sdiv i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+;
+  %div = sdiv i32 %a, %b
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_udiv_const_amount_4(i32 %a) {
+; CHECK-LABEL: 'test_udiv_const_amount_4'
+; CHECK-DAG: DemandedBits: 0xff for   %div = udiv i32 %a, 4
+; CHECK-DAG: DemandedBits: 0x3fc for %a in   %div = udiv i32 %a, 4
+; CHECK-DAG: DemandedBits: 0xffffffff for 4 in   %div = udiv i32 %a, 4
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+;
+  %div = udiv i32 %a, 4
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_udiv_const_amount_5(i32 %a) {
+; CHECK-LABEL: 'test_udiv_const_amount_5'
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for   %div = udiv i32 %a, 5
+; CHECK-DAG: DemandedBits: 0x7ff for %a in   %div = udiv i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xffffffff for 5 in   %div = udiv i32 %a, 5
+;
+  %div = udiv i32 %a, 5
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_udiv_const_amount_8(i32 %a) {
+; CHECK-LABEL: 'test_udiv_const_amount_8'
+; CHECK-DAG: DemandedBits: 0xff for   %div = udiv i32 %a, 8
+; CHECK-DAG: DemandedBits: 0x7f8 for %a in   %div = udiv i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xffffffff for 8 in   %div = udiv i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+;
+  %div = udiv i32 %a, 8
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_udiv_const_amount_9(i32 %a) {
+; CHECK-LABEL: 'test_udiv_const_amount_9'
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for   %div = udiv i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xfff for %a in   %div = udiv i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xffffffff for 9 in   %div = udiv i32 %a, 9
+;
+  %div = udiv i32 %a, 9
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_udiv(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_udiv'
+; CHECK-DAG: DemandedBits: 0xff for   %div = udiv i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %div = udiv i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %div = udiv i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
+;
+  %div = udiv i32 %a, %b
+  %div.t = trunc i32 %div to i8
+  ret i8 %div.t
+}
+
+define i8 @test_srem_const_amount_4(i32 %a) {
+; CHECK-LABEL: 'test_srem_const_amount_4'
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %a, 4
+; CHECK-DAG: DemandedBits: 0x3 for %a in   %rem = srem i32 %a, 4
+; CHECK-DAG: DemandedBits: 0xffffffff for 4 in   %rem = srem i32 %a, 4
+;
+  %rem = srem i32 %a, 4
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
+define i8 @test_srem_const_amount_5(i32 %a) {
+; CHECK-LABEL: 'test_srem_const_amount_5'
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %rem = srem i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xffffffff for 5 in   %rem = srem i32 %a, 5
+;
+  %rem = srem i32 %a, 5
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
+define i8 @test_srem_const_amount_8(i32 %a) {
+; CHECK-LABEL: 'test_srem_const_amount_8'
+; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %a, 8
+; CHECK-DAG: DemandedBits: 0x7 for %a in   %rem = srem i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xffffffff for 8 in   %rem = srem i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+;
+  %rem = srem i32 %a, 8
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
+define i8 @test_srem_const_amount_9(i32 %a) {
+; CHECK-LABEL: 'test_srem_const_amount_9'
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %rem = srem i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xffffffff for 9 in   %rem = srem i32 %a, 9
+;
+  %rem = srem i32 %a, 9
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
+define i8 @test_srem(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_srem'
+; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %rem = srem i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %rem = srem i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+;
+  %rem = srem i32 %a, %b
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
+define i8 @test_urem_const_amount_4(i32 %a) {
+; CHECK-LABEL: 'test_urem_const_amount_4'
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for   %rem = urem i32 %a, 4
+; CHECK-DAG: DemandedBits: 0x3 for %a in   %rem = urem i32 %a, 4
+; CHECK-DAG: DemandedBits: 0xffffffff for 4 in   %rem = urem i32 %a, 4
+;
+  %rem = urem i32 %a, 4
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
+
+
+
+
+
+define i8 @test_urem_const_amount_5(i32 %a) {
+; CHECK-LABEL: 'test_urem_const_amount_5'
+; CHECK-DAG: DemandedBits: 0xff for   %rem = urem i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %rem = urem i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xffffffff for 5 in   %rem = urem i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+;
+  %rem = urem i32 %a, 5
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
+define i8 @test_urem_const_amount_8(i32 %a) {
+; CHECK-LABEL: 'test_urem_const_amount_8'
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for   %rem = urem i32 %a, 8
+; CHECK-DAG: DemandedBits: 0x7 for %a in   %rem = urem i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xffffffff for 8 in   %rem = urem i32 %a, 8
+;
+  %rem = urem i32 %a, 8
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
+define i8 @test_urem_const_amount_9(i32 %a) {
+; CHECK-LABEL: 'test_urem_const_amount_9'
+; CHECK-DAG: DemandedBits: 0xff for   %rem = urem i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %rem = urem i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xffffffff for 9 in   %rem = urem i32 %a, 9
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+;
+  %rem = urem i32 %a, 9
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
+define i8 @test_urem(i32 %a, i32 %b) {
+; CHECK-LABEL: 'test_urem'
+; CHECK-DAG: DemandedBits: 0xff for   %rem = urem i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %rem = urem i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %rem = urem i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+;
+  %rem = urem i32 %a, %b
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}

>From 6d9b2710df83bf206c6019c6e30baff84391b4fd Mon Sep 17 00:00:00 2001
From: Panagiotis Karouzakis <karouzakispan at gmail.com>
Date: Wed, 16 Jul 2025 00:13:28 +0300
Subject: [PATCH 2/6] [LLVM] used APInt's API to be safe with from larger than
 64 bits div/rems

---
 llvm/lib/Analysis/DemandedBits.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index 7327bc4706ffc..260076b01470c 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -258,8 +258,8 @@ void DemandedBits::determineLiveOperandBits(
       const APInt *DivAmnt;
       if (match(UserI->getOperand(1), m_APInt(DivAmnt))) {
         uint64_t D = DivAmnt->getZExtValue();
-        if (isPowerOf2_64(D)) {
-          unsigned Sh = Log2_64(D);
+        if (DivAmnt->isPowerOf2()) {
+          unsigned Sh = DivAmnt->countr_zero();
           if (IsDiv) {
             AB = AOut.shl(Sh);
           } else {
@@ -274,7 +274,7 @@ void DemandedBits::determineLiveOperandBits(
           //                         ^ each step emits 1 quotient bit
           //                         |
           //                         |
-          //   C fits in m = ⌈log₂ C⌉ bits
+          //   C fits in m = ⌈log2 C⌉ bits
           //   Each new quotient bit consumes the window of m low bits and
           //   shifts one position left.
 
@@ -291,7 +291,7 @@ void DemandedBits::determineLiveOperandBits(
           //   TopIndex = k + m-1 = 3 + 2 = 5;
           //   The dividend bits b5...b0 are enough we don't care for b6 and b7.
           unsigned LowQ = AOut.getActiveBits();
-          unsigned Need = LowQ + Log2_64_Ceil(D);
+          unsigned Need = LowQ + DivAmnt->ceilLogBase2();
           if (IsSigned)
             Need++;
           AB = APInt::getLowBitsSet(BitWidth, std::min(BitWidth, Need));

>From d2df5277b6b26820653438c3afe93ff3bfcb9326 Mon Sep 17 00:00:00 2001
From: Panagiotis Karouzakis <karouzakispan at gmail.com>
Date: Wed, 16 Jul 2025 06:39:27 +0300
Subject: [PATCH 3/6] minor updates and added new test that was supposed to be
 failing

---
 llvm/lib/Analysis/DemandedBits.cpp         |  6 ++----
 llvm/test/Analysis/DemandedBits/div_rem.ll | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index 260076b01470c..1672d5aedccc9 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -36,7 +36,6 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstdint>
@@ -257,7 +256,6 @@ void DemandedBits::determineLiveOperandBits(
     if (OperandNo == 0) {
       const APInt *DivAmnt;
       if (match(UserI->getOperand(1), m_APInt(DivAmnt))) {
-        uint64_t D = DivAmnt->getZExtValue();
         if (DivAmnt->isPowerOf2()) {
           unsigned Sh = DivAmnt->countr_zero();
           if (IsDiv) {
@@ -278,10 +276,10 @@ void DemandedBits::determineLiveOperandBits(
           //   Each new quotient bit consumes the window of m low bits and
           //   shifts one position left.
 
-          //   To produce the first LowQ quotient/rem bits we slide the window
+          //   To produce the first LowQ quotient bits we slide the window
           //   LowQ times --> need at most LowQ + m low bits of the dividend.
           //   Need = LowQ + Ceil(log2(C))             (+1 sign bit for
-          //   sdiv/srem). For example : Assume x = b7 b6 b5 b4 b3 b2 b1 b0.
+          //   sdiv). For example : Assume x = b7 b6 b5 b4 b3 b2 b1 b0.
           //   LowQ = 4, C = 5 and ceil(log_2(C)) = 3.
           //   step 0: b2 b1 b0, produces quotient q[0].
           //   step 1: b3 b2 b1, produces quotient q[1].
diff --git a/llvm/test/Analysis/DemandedBits/div_rem.ll b/llvm/test/Analysis/DemandedBits/div_rem.ll
index c27e88f321ac4..3040c305a215e 100644
--- a/llvm/test/Analysis/DemandedBits/div_rem.ll
+++ b/llvm/test/Analysis/DemandedBits/div_rem.ll
@@ -130,6 +130,22 @@ define i8 @test_udiv(i32 %a, i32 %b) {
   ret i8 %div.t
 }
 
+define i8 @test_srem_zext_trunc_const_amount2(i8 %a) {
+; CHECK-LABEL: 'test_srem_const_amount_4'
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %ext, 2
+; CHECK-DAG: DemandedBits: 0xffffffff for %ext in   %rem = srem i32 %ext, 2
+; CHECK-DAG: DemandedBits: 0xffffffff for 2 in   %rem = srem i32 %ext, 2
+; CHECK-DAG: DemandedBits: 0xffffffff for   %ext = sext i8 %a to i32
+; CHECK-DAG: DemandedBits: 0xff for %a in   %ext = sext i8 %a to i32
+;
+  %ext = sext i8 %a to i32
+  %rem = srem i32 %ext, 2
+  %rem.t = trunc i32 %rem to i8
+  ret i8 %rem.t
+}
+
 define i8 @test_srem_const_amount_4(i32 %a) {
 ; CHECK-LABEL: 'test_srem_const_amount_4'
 ; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8

>From e1c0bde2dcac121785ef63c57b923c9ffe3b67e7 Mon Sep 17 00:00:00 2001
From: Panagiotis Karouzakis <karouzakispan at gmail.com>
Date: Wed, 16 Jul 2025 06:43:34 +0300
Subject: [PATCH 4/6] fixed Check-Label

---
 llvm/test/Analysis/DemandedBits/div_rem.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Analysis/DemandedBits/div_rem.ll b/llvm/test/Analysis/DemandedBits/div_rem.ll
index 3040c305a215e..e4d76b370b0dd 100644
--- a/llvm/test/Analysis/DemandedBits/div_rem.ll
+++ b/llvm/test/Analysis/DemandedBits/div_rem.ll
@@ -131,7 +131,7 @@ define i8 @test_udiv(i32 %a, i32 %b) {
 }
 
 define i8 @test_srem_zext_trunc_const_amount2(i8 %a) {
-; CHECK-LABEL: 'test_srem_const_amount_4'
+; CHECK-LABEL: 'test_srem_zext_trunc_const_amount2'
 ; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
 ; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
 ; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %ext, 2

>From 179eb2228e8cc356b1ae42027b71216467f3b5f0 Mon Sep 17 00:00:00 2001
From: Panagiotis Karouzakis <karouzakispan at gmail.com>
Date: Wed, 16 Jul 2025 07:51:30 +0300
Subject: [PATCH 5/6] fixed srem, we need to preserve the sign bit, otherwise
 we might lose it

---
 llvm/lib/Analysis/DemandedBits.cpp         |  3 +++
 llvm/test/Analysis/DemandedBits/div_rem.ll | 22 +++++++++++-----------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index 1672d5aedccc9..d9e02fe108a67 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -262,6 +262,9 @@ void DemandedBits::determineLiveOperandBits(
             AB = AOut.shl(Sh);
           } else {
             AB = AOut & APInt::getLowBitsSet(BitWidth, Sh);
+            if (IsSigned) {
+              AB.setSignBit();
+            }
           }
         } else if (IsDiv) { // Non power of 2 constant div
           //   x =  q * C + r;
diff --git a/llvm/test/Analysis/DemandedBits/div_rem.ll b/llvm/test/Analysis/DemandedBits/div_rem.ll
index e4d76b370b0dd..1f8c958f20380 100644
--- a/llvm/test/Analysis/DemandedBits/div_rem.ll
+++ b/llvm/test/Analysis/DemandedBits/div_rem.ll
@@ -93,11 +93,11 @@ define i8 @test_udiv_const_amount_5(i32 %a) {
 
 define i8 @test_udiv_const_amount_8(i32 %a) {
 ; CHECK-LABEL: 'test_udiv_const_amount_8'
+; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
+; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
 ; CHECK-DAG: DemandedBits: 0xff for   %div = udiv i32 %a, 8
 ; CHECK-DAG: DemandedBits: 0x7f8 for %a in   %div = udiv i32 %a, 8
 ; CHECK-DAG: DemandedBits: 0xffffffff for 8 in   %div = udiv i32 %a, 8
-; CHECK-DAG: DemandedBits: 0xff for   %div.t = trunc i32 %div to i8
-; CHECK-DAG: DemandedBits: 0xff for %div in   %div.t = trunc i32 %div to i8
 ;
   %div = udiv i32 %a, 8
   %div.t = trunc i32 %div to i8
@@ -135,10 +135,10 @@ define i8 @test_srem_zext_trunc_const_amount2(i8 %a) {
 ; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
 ; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
 ; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %ext, 2
-; CHECK-DAG: DemandedBits: 0xffffffff for %ext in   %rem = srem i32 %ext, 2
+; CHECK-DAG: DemandedBits: 0x80000001 for %ext in   %rem = srem i32 %ext, 2
 ; CHECK-DAG: DemandedBits: 0xffffffff for 2 in   %rem = srem i32 %ext, 2
-; CHECK-DAG: DemandedBits: 0xffffffff for   %ext = sext i8 %a to i32
-; CHECK-DAG: DemandedBits: 0xff for %a in   %ext = sext i8 %a to i32
+; CHECK-DAG: DemandedBits: 0x80000001 for   %ext = sext i8 %a to i32
+; CHECK-DAG: DemandedBits: 0x81 for %a in   %ext = sext i8 %a to i32
 ;
   %ext = sext i8 %a to i32
   %rem = srem i32 %ext, 2
@@ -151,7 +151,7 @@ define i8 @test_srem_const_amount_4(i32 %a) {
 ; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
 ; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
 ; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %a, 4
-; CHECK-DAG: DemandedBits: 0x3 for %a in   %rem = srem i32 %a, 4
+; CHECK-DAG: DemandedBits: 0x80000003 for %a in   %rem = srem i32 %a, 4
 ; CHECK-DAG: DemandedBits: 0xffffffff for 4 in   %rem = srem i32 %a, 4
 ;
   %rem = srem i32 %a, 4
@@ -174,11 +174,11 @@ define i8 @test_srem_const_amount_5(i32 %a) {
 
 define i8 @test_srem_const_amount_8(i32 %a) {
 ; CHECK-LABEL: 'test_srem_const_amount_8'
-; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %a, 8
-; CHECK-DAG: DemandedBits: 0x7 for %a in   %rem = srem i32 %a, 8
-; CHECK-DAG: DemandedBits: 0xffffffff for 8 in   %rem = srem i32 %a, 8
 ; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
 ; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
+; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %a, 8
+; CHECK-DAG: DemandedBits: 0x80000007 for %a in   %rem = srem i32 %a, 8
+; CHECK-DAG: DemandedBits: 0xffffffff for 8 in   %rem = srem i32 %a, 8
 ;
   %rem = srem i32 %a, 8
   %rem.t = trunc i32 %rem to i8
@@ -200,11 +200,11 @@ define i8 @test_srem_const_amount_9(i32 %a) {
 
 define i8 @test_srem(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'test_srem'
+; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
 ; CHECK-DAG: DemandedBits: 0xff for   %rem = srem i32 %a, %b
+; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
 ; CHECK-DAG: DemandedBits: 0xffffffff for %a in   %rem = srem i32 %a, %b
 ; CHECK-DAG: DemandedBits: 0xffffffff for %b in   %rem = srem i32 %a, %b
-; CHECK-DAG: DemandedBits: 0xff for   %rem.t = trunc i32 %rem to i8
-; CHECK-DAG: DemandedBits: 0xff for %rem in   %rem.t = trunc i32 %rem to i8
 ;
   %rem = srem i32 %a, %b
   %rem.t = trunc i32 %rem to i8

>From 216f44bcb2003466384d62ef9eaa85a1e59fdb9c Mon Sep 17 00:00:00 2001
From: Panagiotis Karouzakis <karouzakispan at gmail.com>
Date: Wed, 16 Jul 2025 10:40:37 +0300
Subject: [PATCH 6/6] removed risky demanded bits propagation with division

---
 llvm/lib/Analysis/DemandedBits.cpp | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index d9e02fe108a67..c8e9e8be9791d 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -266,36 +266,6 @@ void DemandedBits::determineLiveOperandBits(
               AB.setSignBit();
             }
           }
-        } else if (IsDiv) { // Non power of 2 constant div
-          //   x =  q * C + r;
-          //   q = x / C;
-          //   We think of it like grade school division in base 2.
-          //
-          //    x = [   unused   |  window m-bits |  ...  | needed bits ]
-          //                         ^ each step emits 1 quotient bit
-          //                         |
-          //                         |
-          //   C fits in m = ⌈log2 C⌉ bits
-          //   Each new quotient bit consumes the window of m low bits and
-          //   shifts one position left.
-
-          //   To produce the first LowQ quotient bits we slide the window
-          //   LowQ times --> need at most LowQ + m low bits of the dividend.
-          //   Need = LowQ + Ceil(log2(C))             (+1 sign bit for
-          //   sdiv). For example : Assume x = b7 b6 b5 b4 b3 b2 b1 b0.
-          //   LowQ = 4, C = 5 and ceil(log_2(C)) = 3.
-          //   step 0: b2 b1 b0, produces quotient q[0].
-          //   step 1: b3 b2 b1, produces quotient q[1].
-          //   step 2: b4 b3 b2, produces quotient q[2].
-          //   step 3: b5 b4 b3, produces quotient q[3].
-          //   k = LowQ - 1;
-          //   TopIndex = k + m-1 = 3 + 2 = 5;
-          //   The dividend bits b5...b0 are enough we don't care for b6 and b7.
-          unsigned LowQ = AOut.getActiveBits();
-          unsigned Need = LowQ + DivAmnt->ceilLogBase2();
-          if (IsSigned)
-            Need++;
-          AB = APInt::getLowBitsSet(BitWidth, std::min(BitWidth, Need));
         }
       }
     }



More information about the llvm-commits mailing list