[llvm] [InstCombine] Canonicalize `(sub nuw Mask, X)` -> `(xor Mask, X)` (PR #122542)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 10 14:47:59 PST 2025
https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/122542
https://alive2.llvm.org/ce/z/zGwUBp
This came up when trying to implment:
`(ctlz Pow2)` -> `(sub/xor BW - 1, Log2(Pow2))`
https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1944/files#diff-b13a246c0599d6b2255fae8707bb4e36bf28221ff67f7ad54d169f7bd5ba4e22R13996
>From f99290e58586c3dcbf8980c6c03090c6400e663a Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Fri, 10 Jan 2025 16:44:00 -0600
Subject: [PATCH] [InstCombine] Canonicalize `(sub nuw Mask, X)` -> `(xor Mask,
X)`
https://alive2.llvm.org/ce/z/zGwUBp
This came up when trying to implment:
`(ctlz Pow2)` -> `(sub/xor BW - 1, Log2(Pow2))`
https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1944/files#diff-b13a246c0599d6b2255fae8707bb4e36bf28221ff67f7ad54d169f7bd5ba4e22R13996
---
.../Transforms/InstCombine/InstCombiner.h | 5 +++++
.../InstCombine/InstCombineAddSub.cpp | 6 ++++++
.../InstCombine/InstCombineCompares.cpp | 20 +++++++++----------
llvm/test/Transforms/InstCombine/sub-xor.ll | 2 +-
llvm/test/Transforms/InstCombine/sub.ll | 6 +++---
.../vec_demanded_elts-inseltpoison.ll | 2 +-
.../InstCombine/vec_demanded_elts.ll | 2 +-
.../Transforms/PhaseOrdering/X86/pr88239.ll | 13 ++++++------
.../PhaseOrdering/scev-custom-dl.ll | 8 ++++----
9 files changed, 38 insertions(+), 26 deletions(-)
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index fa6b60cba15aaf..1dae2c82dae2e4 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -236,6 +236,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
return isFreeToInvert(V, WillInvertAllUses, Unused);
}
+ /// If `Not` is true, returns true if V is a negative power of 2 or zero.
+ /// If `Not` is false, returns true if V is a Mask or zero.
+ bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
+ unsigned Depth = 0);
+
/// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
/// InstCombine's freelyInvertAllUsersOf() must be kept in sync with this fn.
/// NOTE: for Instructions only!
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 73876d00e73a7c..d91437e8aec9e1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2780,6 +2780,12 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
return Res;
+ // Canonicalize (sub nuw Mask, X) -> (xor Mask, X)
+ if (I.hasNoUnsignedWrap() &&
+ isMaskOrZero(Op0, /*Not=*/false,
+ getSimplifyQuery().getWithInstruction(&I)))
+ return BinaryOperator::CreateXor(Op0, Op1);
+
return TryToNarrowDeduceFlags();
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2e457257599493..bc3db5b065df42 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4326,8 +4326,8 @@ Instruction *InstCombinerImpl::foldSelectICmp(CmpPredicate Pred, SelectInst *SI,
}
// Returns whether V is a Mask ((X + 1) & X == 0) or ~Mask (-Pow2OrZero)
-static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
- unsigned Depth = 0) {
+bool InstCombiner::isMaskOrZero(const Value *V, bool Not,
+ const SimplifyQuery &Q, unsigned Depth) {
if (Not ? match(V, m_NegatedPower2OrZero()) : match(V, m_LowBitMaskOrZero()))
return true;
if (V->getType()->getScalarSizeInBits() == 1)
@@ -4381,14 +4381,14 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
case Instruction::Add:
// Pow2 - 1 is a Mask.
if (!Not && match(I->getOperand(1), m_AllOnes()))
- return isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true,
- Depth, Q.AC, Q.CxtI, Q.DT);
+ return ::isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true,
+ Depth, Q.AC, Q.CxtI, Q.DT);
break;
case Instruction::Sub:
// -Pow2 is a ~Mask.
if (Not && match(I->getOperand(0), m_Zero()))
- return isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true,
- Depth, Q.AC, Q.CxtI, Q.DT);
+ return ::isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true,
+ Depth, Q.AC, Q.CxtI, Q.DT);
break;
case Instruction::Call: {
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
@@ -4497,13 +4497,13 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0,
if (match(Op0, m_c_And(m_Specific(Op1), m_Value(M)))) {
X = Op1;
// Look for: x & Mask pred x
- if (isMaskOrZero(M, /*Not=*/false, Q)) {
+ if (IC.isMaskOrZero(M, /*Not=*/false, Q)) {
return !ICmpInst::isSigned(Pred) ||
(match(M, m_NonNegative()) || isKnownNonNegative(M, Q));
}
// Look for: x & ~Mask pred ~Mask
- if (isMaskOrZero(X, /*Not=*/true, Q)) {
+ if (IC.isMaskOrZero(X, /*Not=*/true, Q)) {
return !ICmpInst::isSigned(Pred) || isKnownNonZero(X, Q);
}
return false;
@@ -4513,7 +4513,7 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0,
auto Check = [&]() {
// Look for: ~x | Mask == -1
- if (isMaskOrZero(M, /*Not=*/false, Q)) {
+ if (IC.isMaskOrZero(M, /*Not=*/false, Q)) {
if (Value *NotX =
IC.getFreelyInverted(X, X->hasOneUse(), &IC.Builder)) {
X = NotX;
@@ -4531,7 +4531,7 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0,
match(Op0, m_OneUse(m_And(m_Value(X), m_Value(M))))) {
auto Check = [&]() {
// Look for: x & ~Mask == 0
- if (isMaskOrZero(M, /*Not=*/true, Q)) {
+ if (IC.isMaskOrZero(M, /*Not=*/true, Q)) {
if (Value *NotM =
IC.getFreelyInverted(M, M->hasOneUse(), &IC.Builder)) {
M = NotM;
diff --git a/llvm/test/Transforms/InstCombine/sub-xor.ll b/llvm/test/Transforms/InstCombine/sub-xor.ll
index a4135e0b514532..180de6d2f88282 100644
--- a/llvm/test/Transforms/InstCombine/sub-xor.ll
+++ b/llvm/test/Transforms/InstCombine/sub-xor.ll
@@ -166,7 +166,7 @@ define i32 @xor_dominating_cond(i32 %x) {
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X:%.*]], 256
; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
-; CHECK-NEXT: [[A:%.*]] = sub nuw nsw i32 255, [[X]]
+; CHECK-NEXT: [[A:%.*]] = xor i32 [[X]], 255
; CHECK-NEXT: ret i32 [[A]]
; CHECK: if.end:
; CHECK-NEXT: ret i32 [[X]]
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index e89419d1f3838a..ebda52a1354953 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -2204,9 +2204,9 @@ define i8 @shrink_sub_from_constant_lowbits2(i8 %x) {
define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) {
; CHECK-LABEL: @shrink_sub_from_constant_lowbits3(
-; CHECK-NEXT: [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 4)
-; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 24), [[X0000]]
-; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i8> [[SUB]], splat (i8 3)
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 1)
+; CHECK-NEXT: [[SUB:%.*]] = and <2 x i8> [[TMP1]], splat (i8 30)
+; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[SUB]], splat (i8 3)
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%x0000 = shl <2 x i8> %x, <i8 4, i8 4> ; 4 low bits are known zero
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
index a240dfe7d271a9..8d0e340b77a10c 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
@@ -215,7 +215,7 @@ define <3 x i8> @shuf_add(<3 x i8> %x) {
define <3 x i8> @shuf_sub(<3 x i8> %x) {
; CHECK-LABEL: @shuf_sub(
-; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> <i8 1, i8 poison, i8 3>, [[X:%.*]]
+; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], <i8 1, i8 poison, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> <i32 0, i32 poison, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
index ee7ef9955e643f..f9f7017f1bd70d 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -218,7 +218,7 @@ define <3 x i8> @shuf_add(<3 x i8> %x) {
define <3 x i8> @shuf_sub(<3 x i8> %x) {
; CHECK-LABEL: @shuf_sub(
-; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> <i8 1, i8 poison, i8 3>, [[X:%.*]]
+; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], <i8 1, i8 poison, i8 3>
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> <i32 0, i32 poison, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
index b3625094f07ea1..a6c66ff40d8ac6 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
@@ -8,18 +8,19 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ptr noalias nocapture noundef readonly [[TMP0:%.*]], ptr noalias nocapture noundef writeonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -28
; CHECK-NEXT: br label [[TMP4:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[TMP2]] ], [ [[VEC_IND_NEXT:%.*]], [[TMP4]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[VEC_IND]], splat (i64 4294967295)
+; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP6]], splat (i64 255)
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], <8 x i64> [[TMP3]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison)
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5)
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP10]], align 4
+; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[TMP4]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll
index 4a19940b6f874f..181669165220e7 100644
--- a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll
+++ b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll
@@ -141,14 +141,14 @@ define i32 @test_loop_idiom_recogize(i32 %x, i32 %y, ptr %lam, ptr %alp) nounwin
; CHECK-NEXT: Classifying expressions for: @test_loop_idiom_recogize
; CHECK-NEXT: %indvar = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%bb1> U: [0,256) S: [0,256) Exits: 255 LoopDispositions: { %bb1: Computable }
-; CHECK-NEXT: %i.0.reg2mem.0 = sub nuw nsw i32 255, %indvar
-; CHECK-NEXT: --> {255,+,-1}<nsw><%bb1> U: [0,256) S: [0,256) Exits: 0 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT: %i.0.reg2mem.0 = xor i32 %indvar, 255
+; CHECK-NEXT: --> %i.0.reg2mem.0 U: [0,-2147483648) S: [0,-2147483648) Exits: 0 LoopDispositions: { %bb1: Variant }
; CHECK-NEXT: %0 = getelementptr i32, ptr %alp, i32 %i.0.reg2mem.0
-; CHECK-NEXT: --> {(1020 + %alp),+,-4}<nw><%bb1> U: full-set S: full-set Exits: %alp LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT: --> ((4 * %i.0.reg2mem.0) + %alp) U: full-set S: full-set Exits: %alp LoopDispositions: { %bb1: Variant }
; CHECK-NEXT: %1 = load i32, ptr %0, align 4
; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb1: Variant }
; CHECK-NEXT: %2 = getelementptr i32, ptr %lam, i32 %i.0.reg2mem.0
-; CHECK-NEXT: --> {(1020 + %lam),+,-4}<nw><%bb1> U: full-set S: full-set Exits: %lam LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT: --> ((4 * %i.0.reg2mem.0) + %lam) U: full-set S: full-set Exits: %lam LoopDispositions: { %bb1: Variant }
; CHECK-NEXT: %indvar.next = add nuw nsw i32 %indvar, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%bb1> U: [1,257) S: [1,257) Exits: 256 LoopDispositions: { %bb1: Computable }
; CHECK-NEXT: %tmp10 = mul i32 %x, 255
More information about the llvm-commits
mailing list