[llvm] 856ef96 - [RISCV] Optimize (and (icmp x, 0, neq), (icmp y, 0, neq)) utilizing zicond extension
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 22:48:37 PST 2025
Author: Ryan Buchner
Date: 2025-11-06T22:48:33-08:00
New Revision: 856ef9605b2307332911fe4f61be6014697bbcce
URL: https://github.com/llvm/llvm-project/commit/856ef9605b2307332911fe4f61be6014697bbcce
DIFF: https://github.com/llvm/llvm-project/commit/856ef9605b2307332911fe4f61be6014697bbcce.diff
LOG: [RISCV] Optimize (and (icmp x, 0, neq), (icmp y, 0, neq)) utilizing zicond extension
PR #166469
```
%1 = icmp x, 0, neq
%2 = icmp y, 0, neq
%3 = and %1, %2
```
Origionally lowered to:
```
%1 = snez x
%2 = snez y
%3 = and %1, %2
```
With optimiztion:
```
%1 = snez x
%3 = czero.eqz %1, y
```
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/xaluo.ll
llvm/test/CodeGen/RISCV/zicond-opts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c1d38419992b1..1977d3372c5f6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16117,6 +16117,46 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
}
+// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
+// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
+// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
+// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
+static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (!Subtarget.hasCZEROLike())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ auto IsEqualCompZero = [](SDValue &V) -> bool {
+ if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
+ if (ISD::isIntEqualitySetCC(CC))
+ return true;
+ }
+ return false;
+ };
+
+ if (!IsEqualCompZero(N0) || !N0.hasOneUse())
+ std::swap(N0, N1);
+ if (!IsEqualCompZero(N0) || !N0.hasOneUse())
+ return SDValue();
+
+ KnownBits Known = DAG.computeKnownBits(N1);
+ if (Known.getMaxValue().ugt(1))
+ return SDValue();
+
+ unsigned CzeroOpcode =
+ (cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
+ ? RISCVISD::CZERO_EQZ
+ : RISCVISD::CZERO_NEZ;
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
+}
+
static SDValue reduceANDOfAtomicLoad(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
@@ -16180,7 +16220,9 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
return V;
-
+ if (DCI.isAfterLegalizeDAG())
+ if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
+ return V;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index bf6802deeffdc..93b68b0a95b48 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -1834,13 +1834,12 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV32ZICOND-NEXT: mul a5, a3, a0
; RV32ZICOND-NEXT: mul a6, a1, a2
; RV32ZICOND-NEXT: mulhu a7, a0, a2
-; RV32ZICOND-NEXT: snez t0, a3
+; RV32ZICOND-NEXT: add a5, a6, a5
+; RV32ZICOND-NEXT: snez a6, a3
; RV32ZICOND-NEXT: mulhu a3, a3, a0
-; RV32ZICOND-NEXT: mul t1, a0, a2
+; RV32ZICOND-NEXT: mul t0, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
-; RV32ZICOND-NEXT: add a5, a6, a5
-; RV32ZICOND-NEXT: and a1, a1, t0
+; RV32ZICOND-NEXT: czero.eqz a1, a6, a1
; RV32ZICOND-NEXT: snez a0, a0
; RV32ZICOND-NEXT: snez a2, a3
; RV32ZICOND-NEXT: add a5, a7, a5
@@ -1848,7 +1847,7 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV32ZICOND-NEXT: sltu a1, a5, a7
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: sw t1, 0(a4)
+; RV32ZICOND-NEXT: sw t0, 0(a4)
; RV32ZICOND-NEXT: sw a5, 4(a4)
; RV32ZICOND-NEXT: ret
;
@@ -3690,11 +3689,10 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: snez a6, a3
; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: snez a5, a1
-; RV32ZICOND-NEXT: and a5, a5, a6
-; RV32ZICOND-NEXT: mulhu a6, a1, a2
-; RV32ZICOND-NEXT: snez a6, a6
-; RV32ZICOND-NEXT: or a5, a5, a6
+; RV32ZICOND-NEXT: mulhu a5, a1, a2
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a1
+; RV32ZICOND-NEXT: snez a5, a5
+; RV32ZICOND-NEXT: or a5, a6, a5
; RV32ZICOND-NEXT: mulhu a6, a0, a2
; RV32ZICOND-NEXT: add a4, a6, a4
; RV32ZICOND-NEXT: sltu a4, a4, a6
@@ -3783,18 +3781,17 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
; RV32ZICOND: # %bb.0: # %entry
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
-; RV32ZICOND-NEXT: mulhu a6, a0, a2
+; RV32ZICOND-NEXT: add a4, a5, a4
+; RV32ZICOND-NEXT: mulhu a5, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a3, a0
; RV32ZICOND-NEXT: snez a3, a3
; RV32ZICOND-NEXT: mulhu a2, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
-; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: and a1, a1, a3
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: add a4, a6, a4
+; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: or a1, a1, a2
-; RV32ZICOND-NEXT: sltu a2, a4, a6
+; RV32ZICOND-NEXT: sltu a2, a4, a5
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: xori a0, a0, 1
@@ -5156,18 +5153,17 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
; RV32ZICOND: # %bb.0: # %entry
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
-; RV32ZICOND-NEXT: mulhu a6, a0, a2
+; RV32ZICOND-NEXT: add a4, a5, a4
+; RV32ZICOND-NEXT: mulhu a5, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a3, a0
; RV32ZICOND-NEXT: snez a3, a3
; RV32ZICOND-NEXT: mulhu a2, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
-; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: and a1, a1, a3
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: add a4, a6, a4
+; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: or a1, a1, a2
-; RV32ZICOND-NEXT: sltu a2, a4, a6
+; RV32ZICOND-NEXT: sltu a2, a4, a5
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: beqz a0, .LBB64_2
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
index 305ab934e44a7..c6d72981eff32 100644
--- a/llvm/test/CodeGen/RISCV/zicond-opts.ll
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -7,22 +7,132 @@ define i32 @icmp_and(i64 %x, i64 %y) {
; RV32ZICOND-LABEL: icmp_and:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+; Make sure we choose the replace the single use icmp
+define i32 @icmp_and_x_multiple_uses(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and_x_multiple_uses:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: czero.eqz a1, a0, a2
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_x_multiple_uses:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: czero.eqz a1, a0, a1
+; RV64ZICOND-NEXT: add a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ %7 = zext i1 %4 to i32
+ %8 = add i32 %6, %7
+ ret i32 %8
+}
+
+; Make sure we choose the replace the single use icmp
+define i32 @icmp_and_y_multiple_uses(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and_y_multiple_uses:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: add a0, a0, a2
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_y_multiple_uses:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: add a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ %7 = zext i1 %3 to i32
+ %8 = add i32 %6, %7
+ ret i32 %8
+}
+
+; Both icmp's have multiple uses, don't optimize
+define i32 @icmp_and_xy_multiple_uses(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and_xy_multiple_uses:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: snez a1, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a0, a1
+; RV32ZICOND-NEXT: and a2, a0, a1
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: add a0, a2, a0
; RV32ZICOND-NEXT: ret
;
-; RV64ZICOND-LABEL: icmp_and:
+; RV64ZICOND-LABEL: icmp_and_xy_multiple_uses:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a0, a1
+; RV64ZICOND-NEXT: and a2, a0, a1
+; RV64ZICOND-NEXT: add a0, a1, a0
+; RV64ZICOND-NEXT: add a0, a2, a0
; RV64ZICOND-NEXT: ret
%3 = icmp ne i64 %y, 0
%4 = icmp ne i64 %x, 0
%5 = and i1 %4, %3
%6 = zext i1 %5 to i32
+ %7 = zext i1 %3 to i32
+ %8 = zext i1 %4 to i32
+ %9 = add i32 %6, %7
+ %10 = add i32 %9, %8
+ ret i32 %10
+}
+
+
+; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
+define i32 @icmp_and_select(i64 %x, i64 %y, i32 %z) {
+; RV32ZICOND-LABEL: icmp_and_select:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: sgtz a5, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a3
+; RV32ZICOND-NEXT: czero.nez a2, a2, a3
+; RV32ZICOND-NEXT: or a2, a2, a5
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a4, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_select:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: sgtz a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp sgt i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = select i1 %5, i32 %z, i32 0
ret i32 %6
}
@@ -32,21 +142,17 @@ define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: or a4, a4, a5
-; RV32ZICOND-NEXT: snez a1, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a1, a0
-; RV32ZICOND-NEXT: snez a1, a4
-; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and_and:
; RV64ZICOND: # %bb.0:
-; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a1, a0
-; RV64ZICOND-NEXT: snez a1, a2
-; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
; RV64ZICOND-NEXT: ret
%4 = icmp ne i64 %y, 0
%5 = icmp ne i64 %x, 0
More information about the llvm-commits
mailing list