[llvm] [RISCV] Lower SELECT's with one constant more efficiently using Zicond (PR #143581)
Ryan Buchner via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 11 00:48:12 PDT 2025
https://github.com/bababuck updated https://github.com/llvm/llvm-project/pull/143581
>From 7d64e5c312f029748d8df2239e92981c41084607 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Sun, 25 May 2025 11:28:59 -0700
Subject: [PATCH 1/2] [RISCV] Add new tests for RISCV zicond extension
---
llvm/test/CodeGen/RISCV/zicond-opts.ll | 181 +++++++++++++++++++++++++
1 file changed, 181 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/zicond-opts.ll
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
new file mode 100644
index 0000000000000..a3e8a5709c41a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -0,0 +1,181 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -O2 -verify-machineinstrs -mattr=+b,+zicond < %s | FileCheck %s -check-prefix=RV32ZICOND
+; RUN: llc -mtriple=riscv64 -O2 -verify-machineinstrs -mattr=+b,+zicond < %s | FileCheck %s -check-prefix=RV64ZICOND
+
+; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
+define i32 @icmp_and(i64 %0, i64 %1) {
+; RV32ZICOND-LABEL: icmp_and:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: snez a1, a2
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: and a0, a0, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: and a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %1, 0
+ %4 = icmp ne i64 %0, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+; (and (and (icmp x, 0, ne), (icmp y, 0, ne)), (icmp z, 0, ne)) -> (czero.eqo (czero.eqz (icmp x, 0, ne), y), z)
+define i32 @icmp_and_and(i64 %0, i64 %1, i64 %2) {
+; RV32ZICOND-LABEL: icmp_and_and:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: snez a1, a2
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: snez a1, a4
+; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: snez a1, a2
+; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %4 = icmp ne i64 %1, 0
+ %5 = icmp ne i64 %0, 0
+ %6 = and i1 %4, %5
+ %7 = icmp ne i64 %2, 0
+ %8 = and i1 %7, %6
+ %9 = zext i1 %8 to i32
+ ret i32 %9
+}
+
+; (select c, u, rotl(u, t)) -> (rotl (czero_nez t, c), u)
+define i64 @rotate_l_eqz(i64 %0, i64 %1, i64 %2, i64 %3) {
+; RV32ZICOND-LABEL: rotate_l_eqz:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a0, a6, a7
+; RV32ZICOND-NEXT: bexti a1, a4, 5
+; RV32ZICOND-NEXT: not a5, a4
+; RV32ZICOND-NEXT: czero.nez a6, a3, a1
+; RV32ZICOND-NEXT: czero.eqz a7, a2, a1
+; RV32ZICOND-NEXT: czero.nez t0, a2, a1
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
+; RV32ZICOND-NEXT: czero.nez a2, a2, a0
+; RV32ZICOND-NEXT: czero.nez a3, a3, a0
+; RV32ZICOND-NEXT: or a6, a7, a6
+; RV32ZICOND-NEXT: or a1, a1, t0
+; RV32ZICOND-NEXT: sll a7, a6, a4
+; RV32ZICOND-NEXT: srli t0, a1, 1
+; RV32ZICOND-NEXT: sll a1, a1, a4
+; RV32ZICOND-NEXT: srli a4, a6, 1
+; RV32ZICOND-NEXT: srl a6, t0, a5
+; RV32ZICOND-NEXT: srl a4, a4, a5
+; RV32ZICOND-NEXT: or a5, a7, a6
+; RV32ZICOND-NEXT: or a1, a1, a4
+; RV32ZICOND-NEXT: czero.eqz a1, a1, a0
+; RV32ZICOND-NEXT: czero.eqz a4, a5, a0
+; RV32ZICOND-NEXT: or a0, a2, a1
+; RV32ZICOND-NEXT: or a1, a3, a4
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: rotate_l_eqz:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: rol a0, a1, a2
+; RV64ZICOND-NEXT: czero.nez a1, a1, a3
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a3
+; RV64ZICOND-NEXT: or a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %5 = icmp eq i64 %3, 0
+ %6 = call i64 @llvm.fshl.i64(i64 %1, i64 %1, i64 %2)
+ %7 = select i1 %5, i64 %1, i64 %6
+ ret i64 %7
+}
+
+; (select cond, const, t) -> (add (czero_nez t - const, cond), const)
+define i64 @select_imm_reg(i64 %0, i64 %1) {
+; RV32ZICOND-LABEL: select_imm_reg:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: xori a0, a0, 2
+; RV32ZICOND-NEXT: or a1, a0, a1
+; RV32ZICOND-NEXT: li a0, 3
+; RV32ZICOND-NEXT: czero.eqz a2, a2, a1
+; RV32ZICOND-NEXT: czero.nez a0, a0, a1
+; RV32ZICOND-NEXT: or a0, a0, a2
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: select_imm_reg:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: addi a0, a0, -2
+; RV64ZICOND-NEXT: li a2, 3
+; RV64ZICOND-NEXT: czero.eqz a1, a1, a0
+; RV64ZICOND-NEXT: czero.nez a0, a2, a0
+; RV64ZICOND-NEXT: or a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp eq i64 %0, 2
+ %4 = select i1 %3, i64 3, i64 %1
+ ret i64 %4
+}
+
+; (select cond, -2048, t) -> (xor (czero_nez (xor t, -2048), cond), -2048)
+define i64 @select_imm_reg_2048(i64 %0, i64 %1) {
+; RV32ZICOND-LABEL: select_imm_reg_2048:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: xori a0, a0, 2
+; RV32ZICOND-NEXT: li a4, -2048
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: seqz a1, a0
+; RV32ZICOND-NEXT: czero.eqz a2, a2, a0
+; RV32ZICOND-NEXT: czero.nez a0, a4, a0
+; RV32ZICOND-NEXT: or a0, a0, a2
+; RV32ZICOND-NEXT: neg a1, a1
+; RV32ZICOND-NEXT: or a1, a1, a3
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: select_imm_reg_2048:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: addi a0, a0, -2
+; RV64ZICOND-NEXT: li a2, -2048
+; RV64ZICOND-NEXT: czero.eqz a1, a1, a0
+; RV64ZICOND-NEXT: czero.nez a0, a2, a0
+; RV64ZICOND-NEXT: or a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp eq i64 %0, 2
+ %4 = select i1 %3, i64 -2048, i64 %1
+ ret i64 %4
+}
+
+; (select c, (and f, ~x), f) -> (andn f, (czero_nez x, c))
+define i64 @test_inv_and_eqz(i64 %1, i64 %2, i64 %3) {
+; RV32ZICOND-LABEL: test_inv_and_eqz:
+; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: snez a4, a4
+; RV32ZICOND-NEXT: addi a4, a4, -1
+; RV32ZICOND-NEXT: orn a3, a4, a3
+; RV32ZICOND-NEXT: orn a2, a4, a2
+; RV32ZICOND-NEXT: and a0, a2, a0
+; RV32ZICOND-NEXT: and a1, a3, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: test_inv_and_eqz:
+; RV64ZICOND: # %bb.0: # %entry
+; RV64ZICOND-NEXT: czero.nez a2, a0, a2
+; RV64ZICOND-NEXT: andn a0, a0, a1
+; RV64ZICOND-NEXT: or a0, a0, a2
+; RV64ZICOND-NEXT: ret
+entry:
+ %4 = icmp ne i64 %3, 0
+ %5 = xor i64 %2, -1
+ %6 = select i1 %4, i64 %5, i64 -1
+ %7 = and i64 %6, %1
+ ret i64 %7
+}
>From c2e74870cfce6bd7382ad9f2b4137dd9bbf7774b Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 10 Jun 2025 12:59:52 -0700
Subject: [PATCH 2/2] [RISCV] Efficiently lower select %cond, %constant,
%variable using zicond extension
Provides the following lowerings, which reduces instruction count by 1 for rv64:
(select c, c1, t) -> (add (czero_nez t - c1, c), c1)
(select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
For the special case of c1 == -2048
(select c, -2048, t) -> (xor (czero_nez (xor t, -2048), c), -2048)
(select c, t, -2048) -> (xor (czero_eqz (xor t, -2048), c), -2048)
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 26 +++++++++++
.../CodeGen/RISCV/short-forward-branch-opt.ll | 46 ++++++++++++-------
llvm/test/CodeGen/RISCV/zicond-opts.ll | 32 ++++++-------
3 files changed, 70 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab8b36df44d3f..1905564d6073e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9088,6 +9088,32 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
}
+ // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
+ // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
+ if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
+ bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
+ SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
+ SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
+ int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
+ // Fall back to XORI if Const == -0x800
+ if (RawConstVal == -0x800) {
+ SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
+ SDValue CMOV =
+ DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
+ DL, VT, XorOp, CondV);
+ return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
+ }
+ // Efficient only if the constant and its negation fit into `ADDI`
+ // Prefer Add/Sub over Xor since can be compressed for small immediates
+ if (isInt<12>(RawConstVal)) {
+ SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
+ SDValue CMOV =
+ DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
+ DL, VT, SubOp, CondV);
+ return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
+ }
+ }
+
// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
// Unless we have the short forward branch optimization.
if (!Subtarget.hasConditionalMoveFusion())
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index b7b88584f3bdb..13c43a3875a08 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -173,14 +173,21 @@ define signext i32 @test6(i32 signext %x, i32 signext %z) {
; NOSFB-NEXT: or a0, a0, a1
; NOSFB-NEXT: ret
;
-; SFB-LABEL: test6:
-; SFB: # %bb.0:
-; SFB-NEXT: li a2, -1
-; SFB-NEXT: beqz a1, .LBB5_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: .LBB5_2:
-; SFB-NEXT: ret
+; NOZICOND-LABEL: test6:
+; NOZICOND: # %bb.0:
+; NOZICOND-NEXT: li a2, -1
+; NOZICOND-NEXT: beqz a1, .LBB5_2
+; NOZICOND-NEXT: # %bb.1:
+; NOZICOND-NEXT: mv a0, a2
+; NOZICOND-NEXT: .LBB5_2:
+; NOZICOND-NEXT: ret
+;
+; ZICOND-LABEL: test6:
+; ZICOND: # %bb.0:
+; ZICOND-NEXT: addi a0, a0, 1
+; ZICOND-NEXT: czero.nez a0, a0, a1
+; ZICOND-NEXT: addi a0, a0, -1
+; ZICOND-NEXT: ret
%c = icmp eq i32 %z, 0
%b = select i1 %c, i32 %x, i32 -1
ret i32 %b
@@ -195,14 +202,21 @@ define signext i32 @test7(i32 signext %x, i32 signext %z) {
; NOSFB-NEXT: or a0, a0, a1
; NOSFB-NEXT: ret
;
-; SFB-LABEL: test7:
-; SFB: # %bb.0:
-; SFB-NEXT: li a2, -1
-; SFB-NEXT: bnez a1, .LBB6_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: .LBB6_2:
-; SFB-NEXT: ret
+; NOZICOND-LABEL: test7:
+; NOZICOND: # %bb.0:
+; NOZICOND-NEXT: li a2, -1
+; NOZICOND-NEXT: bnez a1, .LBB6_2
+; NOZICOND-NEXT: # %bb.1:
+; NOZICOND-NEXT: mv a0, a2
+; NOZICOND-NEXT: .LBB6_2:
+; NOZICOND-NEXT: ret
+;
+; ZICOND-LABEL: test7:
+; ZICOND: # %bb.0:
+; ZICOND-NEXT: addi a0, a0, 1
+; ZICOND-NEXT: czero.eqz a0, a0, a1
+; ZICOND-NEXT: addi a0, a0, -1
+; ZICOND-NEXT: ret
%c = icmp eq i32 %z, 0
%b = select i1 %c, i32 -1, i32 %x
ret i32 %b
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
index a3e8a5709c41a..4cf77c78771a1 100644
--- a/llvm/test/CodeGen/RISCV/zicond-opts.ll
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -104,21 +104,19 @@ define i64 @select_imm_reg(i64 %0, i64 %1) {
; RV32ZICOND-LABEL: select_imm_reg:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: xori a0, a0, 2
-; RV32ZICOND-NEXT: or a1, a0, a1
-; RV32ZICOND-NEXT: li a0, 3
-; RV32ZICOND-NEXT: czero.eqz a2, a2, a1
-; RV32ZICOND-NEXT: czero.nez a0, a0, a1
-; RV32ZICOND-NEXT: or a0, a0, a2
-; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
+; RV32ZICOND-NEXT: addi a2, a2, -3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: addi a0, a0, 3
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: select_imm_reg:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: addi a0, a0, -2
-; RV64ZICOND-NEXT: li a2, 3
-; RV64ZICOND-NEXT: czero.eqz a1, a1, a0
-; RV64ZICOND-NEXT: czero.nez a0, a2, a0
-; RV64ZICOND-NEXT: or a0, a0, a1
+; RV64ZICOND-NEXT: addi a1, a1, -3
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: addi a0, a0, 3
; RV64ZICOND-NEXT: ret
%3 = icmp eq i64 %0, 2
%4 = select i1 %3, i64 3, i64 %1
@@ -130,23 +128,21 @@ define i64 @select_imm_reg_2048(i64 %0, i64 %1) {
; RV32ZICOND-LABEL: select_imm_reg_2048:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: xori a0, a0, 2
-; RV32ZICOND-NEXT: li a4, -2048
+; RV32ZICOND-NEXT: xori a2, a2, -2048
; RV32ZICOND-NEXT: or a0, a0, a1
; RV32ZICOND-NEXT: seqz a1, a0
-; RV32ZICOND-NEXT: czero.eqz a2, a2, a0
-; RV32ZICOND-NEXT: czero.nez a0, a4, a0
-; RV32ZICOND-NEXT: or a0, a0, a2
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
; RV32ZICOND-NEXT: neg a1, a1
; RV32ZICOND-NEXT: or a1, a1, a3
+; RV32ZICOND-NEXT: xori a0, a0, -2048
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: select_imm_reg_2048:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: addi a0, a0, -2
-; RV64ZICOND-NEXT: li a2, -2048
-; RV64ZICOND-NEXT: czero.eqz a1, a1, a0
-; RV64ZICOND-NEXT: czero.nez a0, a2, a0
-; RV64ZICOND-NEXT: or a0, a0, a1
+; RV64ZICOND-NEXT: xori a1, a1, -2048
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: xori a0, a0, -2048
; RV64ZICOND-NEXT: ret
%3 = icmp eq i64 %0, 2
%4 = select i1 %3, i64 -2048, i64 %1
More information about the llvm-commits
mailing list