[llvm] [WIP] Fold (bitwiseop X, (add (not Y), Z)) -> (bitwiseop X, (not (sub Y, Z))). (PR #141476)
Xu Zhang via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 04:07:28 PDT 2025
https://github.com/simonzgx updated https://github.com/llvm/llvm-project/pull/141476
>From b45efac9c20695f1ce4d2bf8901e1ad27faf3c9c Mon Sep 17 00:00:00 2001
From: Xu Zhang <simonzgx at gmail.com>
Date: Mon, 26 May 2025 19:23:49 +0800
Subject: [PATCH 1/4] [DAG] Fold (bitwiseop X, (add (not Y), Z)) -> (bitwiseop
X, (not (sub Y, Z))).
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index efaa8bd4a7950..87f1b51016954 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7528,6 +7528,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return DAG.getNode(ISD::AND, DL, VT, X,
DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
+ // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
+ if (sd_match(N, m_And(m_Value(X), m_Add(m_Value(NotY), m_Value(Z)))) &&
+ sd_match(NotY, m_Not(m_Value(Y))) &&
+ (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
+ return DAG.getNode(ISD::AND, DL, VT, X,
+ DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
+
// Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
// If we are shifting down an extended sign bit, see if we can simplify
// this to shifting the MSB directly to expose further simplifications.
>From d014f093b30301cd037e870988e2de266609fc5e Mon Sep 17 00:00:00 2001
From: Xu Zhang <simonzgx at gmail.com>
Date: Thu, 29 May 2025 00:59:17 +0800
Subject: [PATCH 2/4] Fix failed UTs and resolve comments.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 48 +++++++++++++++++--
.../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll | 10 ++--
llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll | 33 +++++++------
3 files changed, 65 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 87f1b51016954..5685c7ac65ee8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -396,6 +396,8 @@ namespace {
bool PromoteLoad(SDValue Op);
SDValue foldShiftToAvg(SDNode *N);
+ // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
+ SDValue foldBitwiseOpWithNeg(SDNode *N);
SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
@@ -7529,11 +7531,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
// Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
- if (sd_match(N, m_And(m_Value(X), m_Add(m_Value(NotY), m_Value(Z)))) &&
- sd_match(NotY, m_Not(m_Value(Y))) &&
- (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
- return DAG.getNode(ISD::AND, DL, VT, X,
- DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
+ // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
+ if (SDValue Folded = foldBitwiseOpWithNeg(N))
+ return Folded;
// Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
// If we are shifting down an extended sign bit, see if we can simplify
@@ -8212,6 +8212,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
+ // Fold (or X, (add (not Y), Z)) -> (or X, (not (sub Y, Z)))
+ // Fold (or X, (sub (not Y), Z)) -> (or X, (not (add Y, Z)))
+ if (SDValue Folded = foldBitwiseOpWithNeg(N))
+ return Folded;
+
// fold (or x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -9863,6 +9868,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
N0.getOperand(1));
}
+ // Fold (xor X, (add (not Y), Z)) -> (xor X, (not (sub Y, Z)))
+ // Fold (xor X, (sub (not Y), Z)) -> (xor X, (not (add Y, Z)))
+ if (SDValue Folded = foldBitwiseOpWithNeg(N))
+ return Folded;
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0Opcode == N1.getOpcode())
@@ -11616,6 +11625,35 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N) {
return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
}
+SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N) {
+ if (!TLI.hasAndNot(SDValue(N, 0)))
+ return SDValue();
+
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::AND && Opc != ISD::OR && Opc != ISD::XOR)
+ return SDValue();
+
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+ SDLoc DL(N);
+ SDValue X, Y, Z, NotY;
+
+ if (sd_match(
+ N, m_c_BinOp(Opc, m_Value(X), m_Add(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
+ m_Value(Z)))))
+ return DAG.getNode(Opc, DL, VT, X,
+ DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
+
+ if (sd_match(N, m_c_BinOp(Opc, m_Value(X),
+ m_Sub(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
+ m_Value(Z)))) &&
+ NotY->hasOneUse())
+ return DAG.getNode(Opc, DL, VT, X,
+ DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
+
+ return SDValue();
+}
+
/// Generate Min/Max node
SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True,
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index e564d7bddea6f..55f04635a33ba 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -885,9 +885,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
define i8 @test_not_cttz_i8(i8 %a) nounwind {
; LA32R-LABEL: test_not_cttz_i8:
; LA32R: # %bb.0:
-; LA32R-NEXT: nor $a1, $a0, $zero
-; LA32R-NEXT: addi.w $a1, $a1, -1
-; LA32R-NEXT: and $a0, $a0, $a1
+; LA32R-NEXT: addi.w $a1, $a0, 1
+; LA32R-NEXT: andn $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: andi $a1, $a1, 85
; LA32R-NEXT: sub.w $a0, $a0, $a1
@@ -921,9 +920,8 @@ define i8 @test_not_cttz_i8(i8 %a) nounwind {
define i16 @test_not_cttz_i16(i16 %a) nounwind {
; LA32R-LABEL: test_not_cttz_i16:
; LA32R: # %bb.0:
-; LA32R-NEXT: nor $a1, $a0, $zero
-; LA32R-NEXT: addi.w $a1, $a1, -1
-; LA32R-NEXT: and $a0, $a0, $a1
+; LA32R-NEXT: addi.w $a1, $a0, 1
+; LA32R-NEXT: andn $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: lu12i.w $a2, 5
; LA32R-NEXT: ori $a2, $a2, 1365
diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 5dcb1d63207d1..4715593de701c 100644
--- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -73,17 +73,20 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: xorps %xmm3, %xmm3
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
+; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
; X86-NEXT: calll *%esi
; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X86-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
-; X86-NEXT: pxor %xmm1, %xmm1
-; X86-NEXT: psubd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
+; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
+; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X86-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X86-NEXT: por %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
@@ -108,10 +111,8 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X64-NEXT: retq
; X64-NEXT: LBB0_3: ## %forbody
; X64-NEXT: pushq %rbx
-; X64-NEXT: subq $64, %rsp
-; X64-NEXT: xorps %xmm0, %xmm0
-; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
-; X64-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2]
+; X64-NEXT: subq $48, %rsp
+; X64-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-NEXT: cvttps2dq %xmm1, %xmm0
; X64-NEXT: cvtdq2ps %xmm0, %xmm0
@@ -162,17 +163,19 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
+; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X64-NEXT: psubd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
-; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
-; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X64-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X64-NEXT: orps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
+; X64-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
-; X64-NEXT: xorps %xmm3, %xmm3
-; X64-NEXT: xorps %xmm4, %xmm4
+; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X64-NEXT: por %xmm1, %xmm0
+; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT: xorps %xmm3, %xmm3
+; X64-NEXT: xorps %xmm4, %xmm4
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
; X64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload
>From 8b7ac353d474b08782a6c929be5c50aa9dd53cdf Mon Sep 17 00:00:00 2001
From: Xu Zhang <simonzgx at gmail.com>
Date: Thu, 29 May 2025 17:20:07 +0800
Subject: [PATCH 3/4] Add new test cases and reformat code.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +-
.../AArch64/aarch64-bitwisenot-fold.ll | 316 ++++++++++++++++++
llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll | 8 +-
3 files changed, 325 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5685c7ac65ee8..4fa3da1164889 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11638,15 +11638,15 @@ SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N) {
SDLoc DL(N);
SDValue X, Y, Z, NotY;
- if (sd_match(
- N, m_c_BinOp(Opc, m_Value(X), m_Add(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
- m_Value(Z)))))
+ if (sd_match(N, m_c_BinOp(Opc, m_Value(X),
+ m_Add(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
+ m_Value(Z)))))
return DAG.getNode(Opc, DL, VT, X,
DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
if (sd_match(N, m_c_BinOp(Opc, m_Value(X),
- m_Sub(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
- m_Value(Z)))) &&
+ m_Sub(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
+ m_Value(Z)))) &&
NotY->hasOneUse())
return DAG.getNode(Opc, DL, VT, X,
DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
new file mode 100644
index 0000000000000..29283e57b511b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
@@ -0,0 +1,316 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-linux | FileCheck %s
+
+define i8 @andnot_add_with_neg_i8(i8 %0, i8 %1) {
+; CHECK-LABEL: andnot_add_with_neg_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i8 %0, -1
+ %4 = add i8 %3, %1
+ %5 = and i8 %4, %0
+ ret i8 %5
+}
+
+define i8 @andnot_sub_with_neg_i8(i8 %0, i8 %1) {
+; CHECK-LABEL: andnot_sub_with_neg_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i8 %0, -1
+ %4 = sub i8 %3, %1
+ %5 = and i8 %4, %0
+ ret i8 %5
+}
+
+define i8 @xornot_add_with_neg_i8(i8 %0, i8 %1) {
+; CHECK-LABEL: xornot_add_with_neg_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: eon w0, w8, w0
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i8 %0, -1
+ %4 = add i8 %3, %1
+ %5 = xor i8 %4, %0
+ ret i8 %5
+}
+
+define i8 @xornot_sub_with_neg_i8(i8 %0, i8 %1) {
+; CHECK-LABEL: xornot_sub_with_neg_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: eon w0, w8, w0
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i8 %0, -1
+ %4 = sub i8 %3, %1
+ %5 = xor i8 %4, %0
+ ret i8 %5
+}
+
+define i8 @ornot_add_with_neg_i8(i8 %0, i8 %1) {
+; CHECK-LABEL: ornot_add_with_neg_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: orn w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i8 %0, -1
+ %4 = add i8 %3, %1
+ %5 = or i8 %4, %0
+ ret i8 %5
+}
+
+define i8 @ornot_sub_with_neg_i8(i8 %0, i8 %1) {
+; CHECK-LABEL: ornot_sub_with_neg_i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: orn w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i8 %0, -1
+ %4 = sub i8 %3, %1
+ %5 = or i8 %4, %0
+ ret i8 %5
+}
+
+
+define i16 @andnot_add_with_neg_i16(i16 %0, i16 %1) {
+; CHECK-LABEL: andnot_add_with_neg_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i16 %0, -1
+ %4 = add i16 %3, %1
+ %5 = and i16 %4, %0
+ ret i16 %5
+}
+
+define i16 @andnot_sub_with_neg_i16(i16 %0, i16 %1) {
+; CHECK-LABEL: andnot_sub_with_neg_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i16 %0, -1
+ %4 = sub i16 %3, %1
+ %5 = and i16 %4, %0
+ ret i16 %5
+}
+
+define i16 @xornot_add_with_neg_i16(i16 %0, i16 %1) {
+; CHECK-LABEL: xornot_add_with_neg_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: eon w0, w8, w0
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i16 %0, -1
+ %4 = add i16 %3, %1
+ %5 = xor i16 %4, %0
+ ret i16 %5
+}
+
+define i16 @xornot_sub_with_neg_i16(i16 %0, i16 %1) {
+; CHECK-LABEL: xornot_sub_with_neg_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: eon w0, w8, w0
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i16 %0, -1
+ %4 = sub i16 %3, %1
+ %5 = xor i16 %4, %0
+ ret i16 %5
+}
+
+define i16 @ornot_add_with_neg_i16(i16 %0, i16 %1) {
+; CHECK-LABEL: ornot_add_with_neg_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: orn w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i16 %0, -1
+ %4 = add i16 %3, %1
+ %5 = or i16 %4, %0
+ ret i16 %5
+}
+
+define i16 @ornot_sub_with_neg_i16(i16 %0, i16 %1) {
+; CHECK-LABEL: ornot_sub_with_neg_i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: orn w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i16 %0, -1
+ %4 = sub i16 %3, %1
+ %5 = or i16 %4, %0
+ ret i16 %5
+}
+
+define i32 @andnot_add_with_neg_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: andnot_add_with_neg_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i32 %0, -1
+ %4 = add i32 %3, %1
+ %5 = and i32 %4, %0
+ ret i32 %5
+}
+
+define i32 @andnot_sub_with_neg_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: andnot_sub_with_neg_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i32 %0, -1
+ %4 = sub i32 %3, %1
+ %5 = and i32 %4, %0
+ ret i32 %5
+}
+
+define i32 @xornot_add_with_neg_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: xornot_add_with_neg_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: eon w0, w8, w0
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i32 %0, -1
+ %4 = add i32 %3, %1
+ %5 = xor i32 %4, %0
+ ret i32 %5
+}
+
+define i32 @xornot_sub_with_neg_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: xornot_sub_with_neg_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: eon w0, w8, w0
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i32 %0, -1
+ %4 = sub i32 %3, %1
+ %5 = xor i32 %4, %0
+ ret i32 %5
+}
+
+define i32 @ornot_add_with_neg_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: ornot_add_with_neg_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: orn w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i32 %0, -1
+ %4 = add i32 %3, %1
+ %5 = or i32 %4, %0
+ ret i32 %5
+}
+
+define i32 @ornot_sub_with_neg_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: ornot_sub_with_neg_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: orn w0, w0, w8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i32 %0, -1
+ %4 = sub i32 %3, %1
+ %5 = or i32 %4, %0
+ ret i32 %5
+}
+
+
+define i64 @andnot_add_with_neg_i64(i64 %0, i64 %1) {
+; CHECK-LABEL: andnot_add_with_neg_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub x8, x0, x1
+; CHECK-NEXT: bic x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i64 %0, -1
+ %4 = add i64 %3, %1
+ %5 = and i64 %4, %0
+ ret i64 %5
+}
+
+define i64 @andnot_sub_with_neg_i64(i64 %0, i64 %1) {
+; CHECK-LABEL: andnot_sub_with_neg_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: bic x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i64 %0, -1
+ %4 = sub i64 %3, %1
+ %5 = and i64 %4, %0
+ ret i64 %5
+}
+
+define i64 @xornot_add_with_neg_i64(i64 %0, i64 %1) {
+; CHECK-LABEL: xornot_add_with_neg_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub x8, x0, x1
+; CHECK-NEXT: eon x0, x8, x0
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i64 %0, -1
+ %4 = add i64 %3, %1
+ %5 = xor i64 %4, %0
+ ret i64 %5
+}
+
+define i64 @xornot_sub_with_neg_i64(i64 %0, i64 %1) {
+; CHECK-LABEL: xornot_sub_with_neg_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: eon x0, x8, x0
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i64 %0, -1
+ %4 = sub i64 %3, %1
+ %5 = xor i64 %4, %0
+ ret i64 %5
+}
+
+define i64 @ornot_add_with_neg_i64(i64 %0, i64 %1) {
+; CHECK-LABEL: ornot_add_with_neg_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub x8, x0, x1
+; CHECK-NEXT: orn x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i64 %0, -1
+ %4 = add i64 %3, %1
+ %5 = or i64 %4, %0
+ ret i64 %5
+}
+
+define i64 @ornot_sub_with_neg_i64(i64 %0, i64 %1) {
+; CHECK-LABEL: ornot_sub_with_neg_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: orn x0, x0, x8
+; CHECK-NEXT: ret
+entry:
+ %3 = xor i64 %0, -1
+ %4 = sub i64 %3, %1
+ %5 = or i64 %4, %0
+ ret i64 %5
+}
diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 4715593de701c..e0d5c788b0c69 100644
--- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -112,7 +112,7 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X64-NEXT: LBB0_3: ## %forbody
; X64-NEXT: pushq %rbx
; X64-NEXT: subq $48, %rsp
-; X64-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2]
; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-NEXT: cvttps2dq %xmm1, %xmm0
; X64-NEXT: cvtdq2ps %xmm0, %xmm0
@@ -172,10 +172,10 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X64-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X64-NEXT: por %xmm1, %xmm0
+; X64-NEXT: por %xmm1, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
-; X64-NEXT: xorps %xmm3, %xmm3
-; X64-NEXT: xorps %xmm4, %xmm4
+; X64-NEXT: xorps %xmm3, %xmm3
+; X64-NEXT: xorps %xmm4, %xmm4
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
; X64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload
>From 132ffaa57606e54ddbf09088893b2bcb2d59baa2 Mon Sep 17 00:00:00 2001
From: Xu Zhang <simonzgx at gmail.com>
Date: Thu, 29 May 2025 18:33:34 +0800
Subject: [PATCH 4/4] Fix comments.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 29 +++++++------------
.../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll | 8 ++---
2 files changed, 15 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4fa3da1164889..8010e340ab2b6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -397,7 +397,7 @@ namespace {
SDValue foldShiftToAvg(SDNode *N);
// Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
- SDValue foldBitwiseOpWithNeg(SDNode *N);
+ SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
@@ -7532,7 +7532,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
// Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
- if (SDValue Folded = foldBitwiseOpWithNeg(N))
+ if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
return Folded;
// Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
@@ -8214,7 +8214,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// Fold (or X, (add (not Y), Z)) -> (or X, (not (sub Y, Z)))
// Fold (or X, (sub (not Y), Z)) -> (or X, (not (add Y, Z)))
- if (SDValue Folded = foldBitwiseOpWithNeg(N))
+ if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
return Folded;
// fold (or x, 0) -> x
@@ -9870,7 +9870,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// Fold (xor X, (add (not Y), Z)) -> (xor X, (not (sub Y, Z)))
// Fold (xor X, (sub (not Y), Z)) -> (xor X, (not (add Y, Z)))
- if (SDValue Folded = foldBitwiseOpWithNeg(N))
+ if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
return Folded;
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
@@ -11625,28 +11625,21 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N) {
return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
}
-SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N) {
+SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
if (!TLI.hasAndNot(SDValue(N, 0)))
return SDValue();
unsigned Opc = N->getOpcode();
- if (Opc != ISD::AND && Opc != ISD::OR && Opc != ISD::XOR)
- return SDValue();
-
- SDValue N1 = N->getOperand(1);
- EVT VT = N1.getValueType();
- SDLoc DL(N);
SDValue X, Y, Z, NotY;
-
- if (sd_match(N, m_c_BinOp(Opc, m_Value(X),
- m_Add(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
- m_Value(Z)))))
+ if (sd_match(N, m_BitwiseLogic(m_Value(X), m_Add(m_AllOf(m_Value(NotY),
+ m_Not(m_Value(Y))),
+ m_Value(Z)))))
return DAG.getNode(Opc, DL, VT, X,
DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
- if (sd_match(N, m_c_BinOp(Opc, m_Value(X),
- m_Sub(m_AllOf(m_Value(NotY), m_Not(m_Value(Y))),
- m_Value(Z)))) &&
+ if (sd_match(N, m_BitwiseLogic(m_Value(X), m_Sub(m_AllOf(m_Value(NotY),
+ m_Not(m_Value(Y))),
+ m_Value(Z)))) &&
NotY->hasOneUse())
return DAG.getNode(Opc, DL, VT, X,
DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index 55f04635a33ba..27be02c50f1c7 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -885,8 +885,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
define i8 @test_not_cttz_i8(i8 %a) nounwind {
; LA32R-LABEL: test_not_cttz_i8:
; LA32R: # %bb.0:
-; LA32R-NEXT: addi.w $a1, $a0, 1
-; LA32R-NEXT: andn $a0, $a0, $a1
+; LA32R-NEXT: addi.w $a1, $a0, 1
+; LA32R-NEXT: andn $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: andi $a1, $a1, 85
; LA32R-NEXT: sub.w $a0, $a0, $a1
@@ -920,8 +920,8 @@ define i8 @test_not_cttz_i8(i8 %a) nounwind {
define i16 @test_not_cttz_i16(i16 %a) nounwind {
; LA32R-LABEL: test_not_cttz_i16:
; LA32R: # %bb.0:
-; LA32R-NEXT: addi.w $a1, $a0, 1
-; LA32R-NEXT: andn $a0, $a0, $a1
+; LA32R-NEXT: addi.w $a1, $a0, 1
+; LA32R-NEXT: andn $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: lu12i.w $a2, 5
; LA32R-NEXT: ori $a2, $a2, 1365
More information about the llvm-commits
mailing list