[llvm] 7770b0a - [KnownBits] Improve `KnownBits::rem(X, Y)` in cases where we can deduce low-bits of output
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Sun May 7 17:12:24 PDT 2023
Author: Noah Goldstein
Date: 2023-05-07T19:11:53-05:00
New Revision: 7770b0abfdab60e11fe26010c616ceb9b106a9ef
URL: https://github.com/llvm/llvm-project/commit/7770b0abfdab60e11fe26010c616ceb9b106a9ef
DIFF: https://github.com/llvm/llvm-project/commit/7770b0abfdab60e11fe26010c616ceb9b106a9ef.diff
LOG: [KnownBits] Improve `KnownBits::rem(X, Y)` in cases where we can deduce low-bits of output
The first `cttz(Y)` bits in `X` are translated 1-1 in the output.
Alive2 Links:
https://alive2.llvm.org/ce/z/Qc47p7
https://alive2.llvm.org/ce/z/19ut5H
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D149421
Added:
Modified:
llvm/include/llvm/Support/KnownBits.h
llvm/lib/Support/KnownBits.cpp
llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll
llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index 26b62fb4e3828..6344a3cf90764 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -420,6 +420,11 @@ struct KnownBits {
void print(raw_ostream &OS) const;
void dump() const;
+
+private:
+ // Internal helper for getting the initial KnownBits for an `srem` or `urem`
+ // operation with the low-bits set.
+ static KnownBits remGetLowBits(const KnownBits &LHS, const KnownBits &RHS);
};
inline KnownBits operator&(KnownBits LHS, const KnownBits &RHS) {
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 8b0d6030fea32..e20838278e612 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -546,16 +546,27 @@ KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS) {
return Known;
}
-KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::remGetLowBits(const KnownBits &LHS, const KnownBits &RHS) {
unsigned BitWidth = LHS.getBitWidth();
+ if (!RHS.isZero() && RHS.Zero[0]) {
+ // rem X, Y where Y[0:N] is zero will preserve X[0:N] in the result.
+ unsigned RHSZeros = RHS.countMinTrailingZeros();
+ APInt Mask = APInt::getLowBitsSet(BitWidth, RHSZeros);
+ APInt OnesMask = LHS.One & Mask;
+ APInt ZerosMask = LHS.Zero & Mask;
+ return KnownBits(ZerosMask, OnesMask);
+ }
+ return KnownBits(BitWidth);
+}
+
+KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
assert(!LHS.hasConflict() && !RHS.hasConflict());
- KnownBits Known(BitWidth);
+ KnownBits Known = remGetLowBits(LHS, RHS);
if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) {
- // The upper bits are all zero, the lower ones are unchanged.
- APInt LowBits = RHS.getConstant() - 1;
- Known.Zero = LHS.Zero | ~LowBits;
- Known.One = LHS.One & LowBits;
+ // NB: Low bits set in `remGetLowBits`.
+ APInt HighBits = ~(RHS.getConstant() - 1);
+ Known.Zero |= HighBits;
return Known;
}
@@ -568,16 +579,12 @@ KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
}
KnownBits KnownBits::srem(const KnownBits &LHS, const KnownBits &RHS) {
- unsigned BitWidth = LHS.getBitWidth();
assert(!LHS.hasConflict() && !RHS.hasConflict());
- KnownBits Known(BitWidth);
+ KnownBits Known = remGetLowBits(LHS, RHS);
if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) {
- // The low bits of the first operand are unchanged by the srem.
+ // NB: Low bits are set in `remGetLowBits`.
APInt LowBits = RHS.getConstant() - 1;
- Known.Zero = LHS.Zero & LowBits;
- Known.One = LHS.One & LowBits;
-
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
if (LHS.isNonNegative() || LowBits.isSubsetOf(LHS.Zero))
diff --git a/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll b/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll
index 6b305bde89db0..0521c7130055f 100644
--- a/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll
+++ b/llvm/test/Analysis/ValueTracking/knownbits-rem-lowbits.ll
@@ -3,11 +3,7 @@
define i8 @urem_low_bits_know(i8 %xx, i8 %yy) {
; CHECK-LABEL: @urem_low_bits_know(
-; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 2
-; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -4
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 2
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 2
;
%x = or i8 %xx, 2
%y = and i8 %yy, -4
@@ -18,12 +14,7 @@ define i8 @urem_low_bits_know(i8 %xx, i8 %yy) {
define i8 @urem_low_bits_know2(i8 %xx, i8 %yy) {
; CHECK-LABEL: @urem_low_bits_know2(
-; CHECK-NEXT: [[XO:%.*]] = and i8 [[XX:%.*]], -4
-; CHECK-NEXT: [[X:%.*]] = or i8 [[XO]], 2
-; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -4
-; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 3
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 2
;
%xo = or i8 %xx, 2
%x = and i8 %xo, 254
@@ -80,11 +71,7 @@ define i8 @urem_fail_low_bits_unknown2(i8 %xx, i8 %yy) {
define i8 @srem_low_bits_know(i8 %xx, i8 %yy) {
; CHECK-LABEL: @srem_low_bits_know(
-; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 10
-; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -4
-; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 2
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 2
;
%x = or i8 %xx, 10
%y = and i8 %yy, -4
@@ -95,11 +82,7 @@ define i8 @srem_low_bits_know(i8 %xx, i8 %yy) {
define i8 @srem_low_bits_know2(i8 %xx, i8 %yy) {
; CHECK-LABEL: @srem_low_bits_know2(
-; CHECK-NEXT: [[X:%.*]] = or i8 [[XX:%.*]], 1
-; CHECK-NEXT: [[Y:%.*]] = and i8 [[YY:%.*]], -2
-; CHECK-NEXT: [[REM:%.*]] = srem i8 [[X]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = and i8 [[REM]], 1
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 1
;
%x = or i8 %xx, 1
%y = and i8 %yy, -2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 1cb00e2f58385..904dcd4fed30e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -33,8 +33,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; TFNONE: middle.block:
-; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
-; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; TFNONE: scalar.ph:
; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; TFNONE-NEXT: br label [[FOR_BODY:%.*]]
@@ -193,8 +192,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; TFNONE: middle.block:
-; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
-; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; TFNONE: scalar.ph:
; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; TFNONE-NEXT: br label [[FOR_BODY:%.*]]
@@ -395,8 +393,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; TFNONE: middle.block:
-; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
-; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; TFNONE: scalar.ph:
; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; TFNONE-NEXT: br label [[FOR_BODY:%.*]]
@@ -607,8 +604,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; TFNONE: middle.block:
-; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
-; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; TFNONE: scalar.ph:
; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; TFNONE-NEXT: br label [[FOR_BODY:%.*]]
@@ -666,8 +662,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; TFFALLBACK: middle.block:
-; TFFALLBACK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
-; TFFALLBACK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; TFFALLBACK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; TFFALLBACK: scalar.ph:
; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]]
@@ -731,8 +726,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; TFNONE: middle.block:
-; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]]
-; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; TFNONE: scalar.ph:
; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; TFNONE-NEXT: br label [[FOR_BODY:%.*]]
More information about the llvm-commits
mailing list