[llvm] 3014422 - [llvm] [InstCombine] fold "icmp eq (X + (V - 1)) & -V, X" to "icmp eq (and X, V - 1), 0" (#152851)

Thu Aug 14 00:23:06 PDT 2025

Author: Pavel Skripkin
Date: 2025-08-14T10:23:03+03:00
New Revision: 30144226a4ea916341c37759d93dc2b32502efa1

URL: https://github.com/llvm/llvm-project/commit/30144226a4ea916341c37759d93dc2b32502efa1
DIFF: https://github.com/llvm/llvm-project/commit/30144226a4ea916341c37759d93dc2b32502efa1.diff

LOG: [llvm] [InstCombine] fold "icmp eq (X + (V - 1)) & -V, X" to "icmp eq (and X, V - 1), 0" (#152851)

This fold optimizes 

```llvm
define i1 @src(i32 %num, i32 %val) {
  %mask = add i32 %val, -1
  %neg = sub nsw i32 0, %val

  %num.biased = add i32 %num, %mask
  %_2.sroa.0.0 = and i32 %num.biased, %neg
  %_0 = icmp eq i32 %_2.sroa.0.0, %num
  ret i1 %_0
}
```
to
```llvm
define i1 @tgt(i32 %num, i32 %val) {
  %mask = add i32 %val, -1
  %tmp = and i32 %num, %mask
  %ret = icmp eq i32 %tmp, 0
  ret i1 %ret
}
```

For power-of-two `val`.

Observed in real life for following code

```rust
pub fn is_aligned(num: usize) -> bool {
    num.next_multiple_of(1 << 12) == num
}
```
which verifies that num is aligned to 4096.

Alive2 proof https://alive2.llvm.org/ce/z/QisECm

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/test/Transforms/InstCombine/icmp-add.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index cf94d28100488..a64f422c3eede 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1320,6 +1320,35 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
   return nullptr;
 }
 
+/// Fold icmp eq (num + mask) & ~mask, num
+///      to
+///      icmp eq (and num, mask), 0
+/// Where mask is a low bit mask.
+Instruction *InstCombinerImpl::foldIsMultipleOfAPowerOfTwo(ICmpInst &Cmp) {
+  Value *Num;
+  CmpPredicate Pred;
+  const APInt *Mask, *Neg;
+
+  if (!match(&Cmp,
+             m_c_ICmp(Pred, m_Value(Num),
+                      m_OneUse(m_c_And(m_OneUse(m_c_Add(m_Deferred(Num),
+                                                        m_LowBitMask(Mask))),
+                                       m_APInt(Neg))))))
+    return nullptr;
+
+  if (*Neg != ~*Mask)
+    return nullptr;
+
+  if (!ICmpInst::isEquality(Pred))
+    return nullptr;
+
+  // Create new icmp eq (num & mask), 0
+  auto *NewAnd = Builder.CreateAnd(Num, *Mask);
+  auto *Zero = Constant::getNullValue(Num->getType());
+
+  return new ICmpInst(Pred, NewAnd, Zero);
+}
+
 /// Fold icmp Pred X, C.
 /// TODO: This code structure does not make sense. The saturating add fold
 /// should be moved to some other helper and extended as noted below (it is also
@@ -7644,6 +7673,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
   if (Instruction *Res = foldICmpUsingKnownBits(I))
     return Res;
 
+  if (Instruction *Res = foldIsMultipleOfAPowerOfTwo(I))
+    return Res;
+
   // Test if the ICmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing
   // any other folding. This helps out other analyses which understand

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index c67e27e5b3e7c..2340028ce93dc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -721,6 +721,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
   Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
   Instruction *foldICmpWithConstant(ICmpInst &Cmp);
+  Instruction *foldIsMultipleOfAPowerOfTwo(ICmpInst &Cmp);
   Instruction *foldICmpUsingBoolRange(ICmpInst &I);
   Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
   Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);

diff  --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll
index 1a41c1f3e1045..cb428097f2ae1 100644
--- a/llvm/test/Transforms/InstCombine/icmp-add.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-add.ll
@@ -3300,3 +3300,149 @@ entry:
   %cmp = icmp ult i32 %add, 253
   ret i1 %cmp
 }
+
+; PR 152851
+
+define i1 @val_is_aligend_const_pow2(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_add_commute(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_add_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32  4095, %num
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_and_commute(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_and_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 -4096, %num.biased
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_icm_commute(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_icm_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp eq i32 %num, %num.masked
+  ret i1 %_0
+}
+
+; Should not work for non-power-of-two cases
+define i1 @val_is_aligend_const_non_pow2(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_non_pow2(
+; CHECK-NEXT:    [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 6
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -7
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 6
+  %num.masked = and i32 %num.biased, -7
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_multiuse(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_multiuse(
+; CHECK-NEXT:    [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -4096
+; CHECK-NEXT:    call void @use(i32 [[NUM_MASKED]])
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4096
+  call void @use(i32 %num.masked)
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+; Applies since number of instructions do not change
+define i1 @val_is_aligend_const_pow2_multiuse1(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_multiuse1(
+; CHECK-NEXT:    [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    call void @use(i32 [[NUM_BIASED]])
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -4096
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  call void @use(i32 %num.biased)
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_ne(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_ne(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp ne i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_mismatch(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_mismatch(
+; CHECK-NEXT:    [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp ne i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4095
+  %_0 = icmp ne i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_mismatch1(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_mismatch1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], -4096
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = add i32 [[TMP1]], 4096
+; CHECK-NEXT:    [[_0:%.*]] = icmp ne i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4096
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp ne i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_pred_mismatch(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_pred_mismatch(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], -4096
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = add i32 [[TMP1]], 4096
+; CHECK-NEXT:    [[_0:%.*]] = icmp sge i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4096
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp sge i32 %num.masked, %num
+  ret i1 %_0
+}