[llvm] [RISCV] Lower SELECT's with one constant more efficiently using Zicond (PR #143581)
Ryan Buchner via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 13:49:44 PDT 2025
https://github.com/bababuck updated https://github.com/llvm/llvm-project/pull/143581
>From 98eed72942b21a62661d14326d3702a0f7a3bf3c Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Sun, 25 May 2025 11:28:59 -0700
Subject: [PATCH 1/2] [RISCV] Add new tests for RISCV zicond extension
---
llvm/test/CodeGen/RISCV/zicond-opts.ll | 153 +++++++++++++++++++++++++
1 file changed, 153 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/zicond-opts.ll
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
new file mode 100644
index 0000000000000..c1db993cbb30e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -O2 -verify-machineinstrs -mattr=+b,+zicond < %s | FileCheck %s -check-prefix=RV32ZICOND
+; RUN: llc -mtriple=riscv64 -O2 -verify-machineinstrs -mattr=+b,+zicond < %s | FileCheck %s -check-prefix=RV64ZICOND
+
+; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
+define i32 @icmp_and(i64 %0, i64 %1) {
+; RV32ZICOND-LABEL: icmp_and:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: snez a1, a2
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: and a0, a0, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: and a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %1, 0
+ %4 = icmp ne i64 %0, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+; (and (and (icmp x, 0, ne), (icmp y, 0, ne)), (icmp z, 0, ne)) -> (czero.eqo (czero.eqz (icmp x, 0, ne), y), z)
+define i32 @icmp_and_and(i64 %0, i64 %1, i64 %2) {
+; RV32ZICOND-LABEL: icmp_and_and:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: snez a1, a2
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: snez a1, a4
+; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: snez a1, a2
+; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %4 = icmp ne i64 %1, 0
+ %5 = icmp ne i64 %0, 0
+ %6 = and i1 %4, %5
+ %7 = icmp ne i64 %2, 0
+ %8 = and i1 %7, %6
+ %9 = zext i1 %8 to i32
+ ret i32 %9
+}
+
+; (select c, u, rotl(u, t)) -> (rotl (czero_nez t, c), u)
+define i64 @rotate_l_eqz(i64 %0, i64 %1, i64 %2, i64 %3) {
+; RV32ZICOND-LABEL: rotate_l_eqz:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a0, a6, a7
+; RV32ZICOND-NEXT: bexti a1, a4, 5
+; RV32ZICOND-NEXT: not a5, a4
+; RV32ZICOND-NEXT: czero.nez a6, a3, a1
+; RV32ZICOND-NEXT: czero.eqz a7, a2, a1
+; RV32ZICOND-NEXT: czero.nez t0, a2, a1
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
+; RV32ZICOND-NEXT: czero.nez a2, a2, a0
+; RV32ZICOND-NEXT: czero.nez a3, a3, a0
+; RV32ZICOND-NEXT: or a6, a7, a6
+; RV32ZICOND-NEXT: or a1, a1, t0
+; RV32ZICOND-NEXT: sll a7, a6, a4
+; RV32ZICOND-NEXT: srli t0, a1, 1
+; RV32ZICOND-NEXT: sll a1, a1, a4
+; RV32ZICOND-NEXT: srli a4, a6, 1
+; RV32ZICOND-NEXT: srl a6, t0, a5
+; RV32ZICOND-NEXT: srl a4, a4, a5
+; RV32ZICOND-NEXT: or a5, a7, a6
+; RV32ZICOND-NEXT: or a1, a1, a4
+; RV32ZICOND-NEXT: czero.eqz a1, a1, a0
+; RV32ZICOND-NEXT: czero.eqz a4, a5, a0
+; RV32ZICOND-NEXT: or a0, a2, a1
+; RV32ZICOND-NEXT: or a1, a3, a4
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: rotate_l_eqz:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: rol a0, a1, a2
+; RV64ZICOND-NEXT: czero.nez a1, a1, a3
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a3
+; RV64ZICOND-NEXT: or a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %5 = icmp eq i64 %3, 0
+ %6 = call i64 @llvm.fshl.i64(i64 %1, i64 %1, i64 %2)
+ %7 = select i1 %5, i64 %1, i64 %6
+ ret i64 %7
+}
+
+; (select cond, const, t) -> (add (czero_nez t - const, cond), const)
+define i64 @select_imm_reg(i64 %0, i64 %1) {
+; RV32ZICOND-LABEL: select_imm_reg:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: xori a0, a0, 2
+; RV32ZICOND-NEXT: or a1, a0, a1
+; RV32ZICOND-NEXT: li a0, 3
+; RV32ZICOND-NEXT: czero.eqz a2, a2, a1
+; RV32ZICOND-NEXT: czero.nez a0, a0, a1
+; RV32ZICOND-NEXT: or a0, a0, a2
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: select_imm_reg:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: addi a0, a0, -2
+; RV64ZICOND-NEXT: li a2, 3
+; RV64ZICOND-NEXT: czero.eqz a1, a1, a0
+; RV64ZICOND-NEXT: czero.nez a0, a2, a0
+; RV64ZICOND-NEXT: or a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp eq i64 %0, 2
+ %4 = select i1 %3, i64 3, i64 %1
+ ret i64 %4
+}
+
+; (select c, (and f, ~x), f) -> (andn f, (czero_nez x, c))
+define i64 @test_inv_and_eqz(i64 %1, i64 %2, i64 %3) {
+; RV32ZICOND-LABEL: test_inv_and_eqz:
+; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: snez a4, a4
+; RV32ZICOND-NEXT: addi a4, a4, -1
+; RV32ZICOND-NEXT: orn a3, a4, a3
+; RV32ZICOND-NEXT: orn a2, a4, a2
+; RV32ZICOND-NEXT: and a0, a2, a0
+; RV32ZICOND-NEXT: and a1, a3, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: test_inv_and_eqz:
+; RV64ZICOND: # %bb.0: # %entry
+; RV64ZICOND-NEXT: czero.nez a2, a0, a2
+; RV64ZICOND-NEXT: andn a0, a0, a1
+; RV64ZICOND-NEXT: or a0, a0, a2
+; RV64ZICOND-NEXT: ret
+entry:
+ %4 = icmp ne i64 %3, 0
+ %5 = xor i64 %2, -1
+ %6 = select i1 %4, i64 %5, i64 -1
+ %7 = and i64 %6, %1
+ ret i64 %7
+}
>From af9bf831700dc22ede8139b1b57cb3325ad5cb4c Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 10 Jun 2025 12:59:52 -0700
Subject: [PATCH 2/2] [RISCV] Efficiently lower select %cond, %constant,
%variable using zicond extension
Provides the following lowerings, which reduces instruction count by 1 for rv64:
(select c, c1, t) -> (add (czero_nez t - c1, c), c1)
(select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 16 +++++++
.../CodeGen/RISCV/short-forward-branch-opt.ll | 46 ++++++++++++-------
llvm/test/CodeGen/RISCV/zicond-opts.ll | 18 ++++----
3 files changed, 54 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab8b36df44d3f..99e79034c9f89 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9088,6 +9088,22 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
}
+ // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
+ // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
+ if (isa<ConstantSDNode>(TrueV) ^ isa<ConstantSDNode>(FalseV)) {
+ bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
+ SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
+ // Efficient only if the constant and its negation fit into `ADDI`
+ if (std::abs(cast<ConstantSDNode>(ConstVal)->getSExtValue()) < 0x800) {
+ SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
+ SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
+ SDValue CMOV =
+ DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
+ DL, VT, SubOp, CondV);
+ return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
+ }
+ }
+
// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
// Unless we have the short forward branch optimization.
if (!Subtarget.hasConditionalMoveFusion())
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index b7b88584f3bdb..13c43a3875a08 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -173,14 +173,21 @@ define signext i32 @test6(i32 signext %x, i32 signext %z) {
; NOSFB-NEXT: or a0, a0, a1
; NOSFB-NEXT: ret
;
-; SFB-LABEL: test6:
-; SFB: # %bb.0:
-; SFB-NEXT: li a2, -1
-; SFB-NEXT: beqz a1, .LBB5_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: .LBB5_2:
-; SFB-NEXT: ret
+; NOZICOND-LABEL: test6:
+; NOZICOND: # %bb.0:
+; NOZICOND-NEXT: li a2, -1
+; NOZICOND-NEXT: beqz a1, .LBB5_2
+; NOZICOND-NEXT: # %bb.1:
+; NOZICOND-NEXT: mv a0, a2
+; NOZICOND-NEXT: .LBB5_2:
+; NOZICOND-NEXT: ret
+;
+; ZICOND-LABEL: test6:
+; ZICOND: # %bb.0:
+; ZICOND-NEXT: addi a0, a0, 1
+; ZICOND-NEXT: czero.nez a0, a0, a1
+; ZICOND-NEXT: addi a0, a0, -1
+; ZICOND-NEXT: ret
%c = icmp eq i32 %z, 0
%b = select i1 %c, i32 %x, i32 -1
ret i32 %b
@@ -195,14 +202,21 @@ define signext i32 @test7(i32 signext %x, i32 signext %z) {
; NOSFB-NEXT: or a0, a0, a1
; NOSFB-NEXT: ret
;
-; SFB-LABEL: test7:
-; SFB: # %bb.0:
-; SFB-NEXT: li a2, -1
-; SFB-NEXT: bnez a1, .LBB6_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: .LBB6_2:
-; SFB-NEXT: ret
+; NOZICOND-LABEL: test7:
+; NOZICOND: # %bb.0:
+; NOZICOND-NEXT: li a2, -1
+; NOZICOND-NEXT: bnez a1, .LBB6_2
+; NOZICOND-NEXT: # %bb.1:
+; NOZICOND-NEXT: mv a0, a2
+; NOZICOND-NEXT: .LBB6_2:
+; NOZICOND-NEXT: ret
+;
+; ZICOND-LABEL: test7:
+; ZICOND: # %bb.0:
+; ZICOND-NEXT: addi a0, a0, 1
+; ZICOND-NEXT: czero.eqz a0, a0, a1
+; ZICOND-NEXT: addi a0, a0, -1
+; ZICOND-NEXT: ret
%c = icmp eq i32 %z, 0
%b = select i1 %c, i32 -1, i32 %x
ret i32 %b
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
index c1db993cbb30e..ea80f4a0f3530 100644
--- a/llvm/test/CodeGen/RISCV/zicond-opts.ll
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -104,21 +104,19 @@ define i64 @select_imm_reg(i64 %0, i64 %1) {
; RV32ZICOND-LABEL: select_imm_reg:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: xori a0, a0, 2
-; RV32ZICOND-NEXT: or a1, a0, a1
-; RV32ZICOND-NEXT: li a0, 3
-; RV32ZICOND-NEXT: czero.eqz a2, a2, a1
-; RV32ZICOND-NEXT: czero.nez a0, a0, a1
-; RV32ZICOND-NEXT: or a0, a0, a2
-; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
+; RV32ZICOND-NEXT: addi a2, a2, -3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: addi a0, a0, 3
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: select_imm_reg:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: addi a0, a0, -2
-; RV64ZICOND-NEXT: li a2, 3
-; RV64ZICOND-NEXT: czero.eqz a1, a1, a0
-; RV64ZICOND-NEXT: czero.nez a0, a2, a0
-; RV64ZICOND-NEXT: or a0, a0, a1
+; RV64ZICOND-NEXT: addi a1, a1, -3
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: addi a0, a0, 3
; RV64ZICOND-NEXT: ret
%3 = icmp eq i64 %0, 2
%4 = select i1 %3, i64 3, i64 %1
More information about the llvm-commits
mailing list