[llvm] 62cd07f - [InstCombine] Canonicalize `sub mask, X -> ~X` when high bits are ignored (#110635)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 1 21:48:08 PDT 2024
Author: Yingwei Zheng
Date: 2024-10-02T12:48:06+08:00
New Revision: 62cd07fb67c946d251a18d3a564b535553939c9a
URL: https://github.com/llvm/llvm-project/commit/62cd07fb67c946d251a18d3a564b535553939c9a
DIFF: https://github.com/llvm/llvm-project/commit/62cd07fb67c946d251a18d3a564b535553939c9a.diff
LOG: [InstCombine] Canonicalize `sub mask, X -> ~X` when high bits are ignored (#110635)
Alive2: https://alive2.llvm.org/ce/z/NJgBPL
The motivating case of this patch is to emit `andn` on RISC-V with zbb
for expressions like `(sub 63, X) & 63`.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
llvm/test/Transforms/InstCombine/sub.ll
llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 0ad178594be03e..ee6b60f7f70d68 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -592,6 +592,15 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
+ // Canonicalize sub mask, X -> ~X
+ const APInt *LHSC;
+ if (match(I->getOperand(0), m_LowBitMask(LHSC)) &&
+ DemandedFromOps.isSubsetOf(*LHSC)) {
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(I);
+ return Builder.CreateNot(I->getOperand(1));
+ }
+
// Otherwise just compute the known bits of the result.
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index ec88984c49cca6..ba596e10e8b3fe 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -2797,3 +2797,71 @@ if.then:
if.else:
ret i32 0
}
+
+define i32 @fold_sub_and_into_andn(i32 %x) {
+; CHECK-LABEL: @fold_sub_and_into_andn(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X:%.*]], 63
+; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP0]], 63
+; CHECK-NEXT: ret i32 [[AND]]
+;
+entry:
+ %sub = sub i32 63, %x
+ %and = and i32 %sub, 63
+ ret i32 %and
+}
+
+define i1 @fold_sub_and_into_andn_icmp(i32 %x) {
+; CHECK-LABEL: @fold_sub_and_into_andn_icmp(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X:%.*]], 63
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 63
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+entry:
+ %sub = sub i32 63, %x
+ %and = and i32 %sub, 63
+ %cmp = icmp eq i32 %and, 0
+ ret i1 %cmp
+}
+
+define i32 @fold_sub_and_into_andn_subset(i32 %x) {
+; CHECK-LABEL: @fold_sub_and_into_andn_subset(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X:%.*]], 31
+; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP0]], 31
+; CHECK-NEXT: ret i32 [[AND]]
+;
+entry:
+ %sub = sub i32 63, %x
+ %and = and i32 %sub, 31
+ ret i32 %and
+}
+
+; Negative tests
+
+define i32 @fold_sub_and_into_andn_nonmask(i32 %x, i32 %y) {
+; CHECK-LABEL: @fold_sub_and_into_andn_nonmask(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], 63
+; CHECK-NEXT: ret i32 [[AND]]
+;
+entry:
+ %sub = sub i32 %y, %x
+ %and = and i32 %sub, 63
+ ret i32 %and
+}
+
+define i32 @fold_sub_and_into_andn_superset(i32 %x) {
+; CHECK-LABEL: @fold_sub_and_into_andn_superset(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 63, [[X:%.*]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], 127
+; CHECK-NEXT: ret i32 [[AND]]
+;
+entry:
+ %sub = sub i32 63, %x
+ %and = and i32 %sub, 127
+ ret i32 %and
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
index 7ee9812b3e74cc..841096d226f756 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
@@ -17,13 +17,13 @@ define noundef i64 @foo(i64 noundef %0) {
; CHECK-NEXT: ret i64 [[TMP3]]
;
; SSE-LABEL: @foo(
-; SSE-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 44
-; SSE-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
+; SSE-NEXT: [[TMP2:%.*]] = xor i64 [[TMP0:%.*]], -1
+; SSE-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 44
; SSE-NEXT: ret i64 [[TMP3]]
;
; AVX-LABEL: @foo(
-; AVX-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 44
-; AVX-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
+; AVX-NEXT: [[TMP2:%.*]] = xor i64 [[TMP0:%.*]], -1
+; AVX-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 44
; AVX-NEXT: ret i64 [[TMP3]]
;
%2 = sub i64 1048575, %0
@@ -34,35 +34,35 @@ define noundef i64 @foo(i64 noundef %0) {
define void @bar(ptr noundef %0) {
; SSE-LABEL: @bar(
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[TMP0:%.*]], align 8
-; SSE-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 44, i64 44>
-; SSE-NEXT: [[TMP4:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
+; SSE-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], <i64 -1, i64 -1>
+; SSE-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], <i64 44, i64 44>
; SSE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP0]], align 8
; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
; SSE-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
-; SSE-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], <i64 44, i64 44>
-; SSE-NEXT: [[TMP8:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
+; SSE-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], <i64 -1, i64 -1>
+; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], <i64 44, i64 44>
; SSE-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP5]], align 8
; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
; SSE-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
-; SSE-NEXT: [[TMP11:%.*]] = shl <2 x i64> [[TMP10]], <i64 44, i64 44>
-; SSE-NEXT: [[TMP12:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP11]]
+; SSE-NEXT: [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], <i64 -1, i64 -1>
+; SSE-NEXT: [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], <i64 44, i64 44>
; SSE-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP9]], align 8
; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48
; SSE-NEXT: [[TMP14:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8
-; SSE-NEXT: [[TMP15:%.*]] = shl <2 x i64> [[TMP14]], <i64 44, i64 44>
-; SSE-NEXT: [[TMP16:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP15]]
+; SSE-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], <i64 -1, i64 -1>
+; SSE-NEXT: [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], <i64 44, i64 44>
; SSE-NEXT: store <2 x i64> [[TMP16]], ptr [[TMP13]], align 8
; SSE-NEXT: ret void
;
; AVX-LABEL: @bar(
; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8
-; AVX-NEXT: [[TMP3:%.*]] = shl <4 x i64> [[TMP2]], <i64 44, i64 44, i64 44, i64 44>
-; AVX-NEXT: [[TMP4:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
+; AVX-NEXT: [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], <i64 -1, i64 -1, i64 -1, i64 -1>
+; AVX-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], <i64 44, i64 44, i64 44, i64 44>
; AVX-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP0]], align 8
; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
; AVX-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
-; AVX-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP6]], <i64 44, i64 44, i64 44, i64 44>
-; AVX-NEXT: [[TMP8:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
+; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], <i64 -1, i64 -1, i64 -1, i64 -1>
+; AVX-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], <i64 44, i64 44, i64 44, i64 44>
; AVX-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP5]], align 8
; AVX-NEXT: ret void
;
More information about the llvm-commits
mailing list