[llvm] [RISCV] Generate bexti for (select(setcc eq (and x, c))) where c is power of 2. (PR #73649)
Yeting Kuo via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 28 06:04:30 PST 2023
https://github.com/yetingk created https://github.com/llvm/llvm-project/pull/73649
Currently, llvm can transform (setcc ne (and x, c)) to (bexti x, log2(c)) where c is power of 2.
This patch transform (select (setcc ne (and x, c)), T, F) into (select (setcc eq (and x, c)), F, T).
It is benefit to the case c is not fit to 12-bits.
>From 41619ca53093b66f6f36ae7d55ea750d2eb6f582 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Tue, 28 Nov 2023 05:26:28 -0800
Subject: [PATCH 1/2] [RISCV] Precommit test.
---
llvm/test/CodeGen/RISCV/condops.ll | 476 +++++++++++++++++------------
1 file changed, 286 insertions(+), 190 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll
index b9912c6ccfb98cd..08b120d1251238d 100644
--- a/llvm/test/CodeGen/RISCV/condops.ll
+++ b/llvm/test/CodeGen/RISCV/condops.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32f -mattr=+f < %s | FileCheck %s -check-prefix=RV32I
-; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f < %s | FileCheck %s -check-prefix=RV64I
-; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+xventanacondops < %s | FileCheck %s -check-prefix=RV64XVENTANACONDOPS
-; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+xtheadcondmov < %s | FileCheck %s -check-prefix=RV64XTHEADCONDMOV
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32f -mattr=+f,+experimental-zicond < %s | FileCheck %s -check-prefix=RV32ZICOND
-; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+experimental-zicond < %s | FileCheck %s -check-prefix=RV64ZICOND
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32f -mattr=+f,+zbs < %s | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs < %s | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs,+xventanacondops < %s | FileCheck %s -check-prefix=RV64XVENTANACONDOPS
+; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs,+xtheadcondmov < %s | FileCheck %s -check-prefix=RV64XTHEADCONDMOV
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32f -mattr=+f,+zbs,+experimental-zicond < %s | FileCheck %s -check-prefix=RV32ZICOND
+; RUN: llc -mtriple=riscv64 -target-abi=lp64f -mattr=+f,+zbs,+experimental-zicond < %s | FileCheck %s -check-prefix=RV64ZICOND
define i64 @zero1(i64 %rs1, i1 zeroext %rc) {
; RV32I-LABEL: zero1:
@@ -82,6 +82,107 @@ define i64 @zero2(i64 %rs1, i1 zeroext %rc) {
ret i64 %sel
}
+define i64 @zero_singlebit1(i64 %rs1, i64 %rs2) {
+; RV32I-LABEL: zero_singlebit1:
+; RV32I: # %bb.0:
+; RV32I-NEXT: bexti a2, a2, 12
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a0, a2, a0
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: zero_singlebit1:
+; RV64I: # %bb.0:
+; RV64I-NEXT: bexti a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64XVENTANACONDOPS-LABEL: zero_singlebit1:
+; RV64XVENTANACONDOPS: # %bb.0:
+; RV64XVENTANACONDOPS-NEXT: lui a2, 1
+; RV64XVENTANACONDOPS-NEXT: and a1, a1, a2
+; RV64XVENTANACONDOPS-NEXT: vt.maskcn a0, a0, a1
+; RV64XVENTANACONDOPS-NEXT: ret
+;
+; RV64XTHEADCONDMOV-LABEL: zero_singlebit1:
+; RV64XTHEADCONDMOV: # %bb.0:
+; RV64XTHEADCONDMOV-NEXT: lui a2, 1
+; RV64XTHEADCONDMOV-NEXT: and a1, a1, a2
+; RV64XTHEADCONDMOV-NEXT: th.mvnez a0, zero, a1
+; RV64XTHEADCONDMOV-NEXT: ret
+;
+; RV32ZICOND-LABEL: zero_singlebit1:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: lui a3, 1
+; RV32ZICOND-NEXT: and a2, a2, a3
+; RV32ZICOND-NEXT: czero.nez a0, a0, a2
+; RV32ZICOND-NEXT: czero.nez a1, a1, a2
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: zero_singlebit1:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: lui a2, 1
+; RV64ZICOND-NEXT: and a1, a1, a2
+; RV64ZICOND-NEXT: czero.nez a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %and = and i64 %rs2, 4096
+ %rc = icmp eq i64 %and, 0
+ %sel = select i1 %rc, i64 %rs1, i64 0
+ ret i64 %sel
+}
+
+; TODO: Optimize Zicond case.
+define i64 @zero_singlebit2(i64 %rs1, i64 %rs2) {
+; RV32I-LABEL: zero_singlebit2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a2, a2, 19
+; RV32I-NEXT: srai a2, a2, 31
+; RV32I-NEXT: and a0, a2, a0
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: zero_singlebit2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 51
+; RV64I-NEXT: srai a1, a1, 63
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64XVENTANACONDOPS-LABEL: zero_singlebit2:
+; RV64XVENTANACONDOPS: # %bb.0:
+; RV64XVENTANACONDOPS-NEXT: slli a1, a1, 51
+; RV64XVENTANACONDOPS-NEXT: srai a1, a1, 63
+; RV64XVENTANACONDOPS-NEXT: and a0, a1, a0
+; RV64XVENTANACONDOPS-NEXT: ret
+;
+; RV64XTHEADCONDMOV-LABEL: zero_singlebit2:
+; RV64XTHEADCONDMOV: # %bb.0:
+; RV64XTHEADCONDMOV-NEXT: slli a1, a1, 51
+; RV64XTHEADCONDMOV-NEXT: srai a1, a1, 63
+; RV64XTHEADCONDMOV-NEXT: and a0, a1, a0
+; RV64XTHEADCONDMOV-NEXT: ret
+;
+; RV32ZICOND-LABEL: zero_singlebit2:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: slli a2, a2, 19
+; RV32ZICOND-NEXT: srai a2, a2, 31
+; RV32ZICOND-NEXT: and a0, a2, a0
+; RV32ZICOND-NEXT: and a1, a2, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: zero_singlebit2:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: slli a1, a1, 51
+; RV64ZICOND-NEXT: srai a1, a1, 63
+; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %and = and i64 %rs2, 4096
+ %rc = icmp eq i64 %and, 0
+ %sel = select i1 %rc, i64 0, i64 %rs1
+ ret i64 %sel
+}
+
define i64 @add1(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: add1:
; RV32I: # %bb.0:
@@ -779,21 +880,21 @@ define i64 @xor4(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
define i64 @and1(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: and1:
; RV32I: # %bb.0:
-; RV32I-NEXT: beqz a0, .LBB16_2
+; RV32I-NEXT: beqz a0, .LBB18_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: and a2, a2, a4
; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: .LBB16_2:
+; RV32I-NEXT: .LBB18_2:
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: mv a1, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: and1:
; RV64I: # %bb.0:
-; RV64I-NEXT: beqz a0, .LBB16_2
+; RV64I-NEXT: beqz a0, .LBB18_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: .LBB16_2:
+; RV64I-NEXT: .LBB18_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -836,21 +937,21 @@ define i64 @and1(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
define i64 @and2(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: and2:
; RV32I: # %bb.0:
-; RV32I-NEXT: beqz a0, .LBB17_2
+; RV32I-NEXT: beqz a0, .LBB19_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: and a4, a2, a4
; RV32I-NEXT: and a3, a1, a3
-; RV32I-NEXT: .LBB17_2:
+; RV32I-NEXT: .LBB19_2:
; RV32I-NEXT: mv a0, a3
; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: ret
;
; RV64I-LABEL: and2:
; RV64I: # %bb.0:
-; RV64I-NEXT: beqz a0, .LBB17_2
+; RV64I-NEXT: beqz a0, .LBB19_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: and a2, a1, a2
-; RV64I-NEXT: .LBB17_2:
+; RV64I-NEXT: .LBB19_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -893,21 +994,21 @@ define i64 @and2(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
define i64 @and3(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: and3:
; RV32I: # %bb.0:
-; RV32I-NEXT: bnez a0, .LBB18_2
+; RV32I-NEXT: bnez a0, .LBB20_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: and a2, a2, a4
; RV32I-NEXT: and a1, a1, a3
-; RV32I-NEXT: .LBB18_2:
+; RV32I-NEXT: .LBB20_2:
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: mv a1, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: and3:
; RV64I: # %bb.0:
-; RV64I-NEXT: bnez a0, .LBB18_2
+; RV64I-NEXT: bnez a0, .LBB20_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: and a1, a1, a2
-; RV64I-NEXT: .LBB18_2:
+; RV64I-NEXT: .LBB20_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -950,21 +1051,21 @@ define i64 @and3(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
define i64 @and4(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: and4:
; RV32I: # %bb.0:
-; RV32I-NEXT: bnez a0, .LBB19_2
+; RV32I-NEXT: bnez a0, .LBB21_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: and a4, a2, a4
; RV32I-NEXT: and a3, a1, a3
-; RV32I-NEXT: .LBB19_2:
+; RV32I-NEXT: .LBB21_2:
; RV32I-NEXT: mv a0, a3
; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: ret
;
; RV64I-LABEL: and4:
; RV64I: # %bb.0:
-; RV64I-NEXT: bnez a0, .LBB19_2
+; RV64I-NEXT: bnez a0, .LBB21_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: and a2, a1, a2
-; RV64I-NEXT: .LBB19_2:
+; RV64I-NEXT: .LBB21_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1007,21 +1108,21 @@ define i64 @and4(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
define i64 @basic(i1 zeroext %rc, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: basic:
; RV32I: # %bb.0:
-; RV32I-NEXT: bnez a0, .LBB20_2
+; RV32I-NEXT: bnez a0, .LBB22_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: .LBB20_2:
+; RV32I-NEXT: .LBB22_2:
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: mv a1, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: basic:
; RV64I: # %bb.0:
-; RV64I-NEXT: bnez a0, .LBB20_2
+; RV64I-NEXT: bnez a0, .LBB22_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a2
-; RV64I-NEXT: .LBB20_2:
+; RV64I-NEXT: .LBB22_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -1067,19 +1168,19 @@ define i64 @seteq(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-NEXT: or a2, a0, a1
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: beqz a2, .LBB21_2
+; RV32I-NEXT: beqz a2, .LBB23_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: mv a1, a7
-; RV32I-NEXT: .LBB21_2:
+; RV32I-NEXT: .LBB23_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq:
; RV64I: # %bb.0:
-; RV64I-NEXT: beq a0, a1, .LBB21_2
+; RV64I-NEXT: beq a0, a1, .LBB23_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB21_2:
+; RV64I-NEXT: .LBB23_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1131,19 +1232,19 @@ define i64 @setne(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-NEXT: or a2, a0, a1
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: bnez a2, .LBB22_2
+; RV32I-NEXT: bnez a2, .LBB24_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a6
; RV32I-NEXT: mv a1, a7
-; RV32I-NEXT: .LBB22_2:
+; RV32I-NEXT: .LBB24_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: setne:
; RV64I: # %bb.0:
-; RV64I-NEXT: bne a0, a1, .LBB22_2
+; RV64I-NEXT: bne a0, a1, .LBB24_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB22_2:
+; RV64I-NEXT: .LBB24_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1190,28 +1291,28 @@ define i64 @setne(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
define i64 @setgt(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setgt:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB23_2
+; RV32I-NEXT: beq a1, a3, .LBB25_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a0, a3, a1
-; RV32I-NEXT: beqz a0, .LBB23_3
-; RV32I-NEXT: j .LBB23_4
-; RV32I-NEXT: .LBB23_2:
+; RV32I-NEXT: beqz a0, .LBB25_3
+; RV32I-NEXT: j .LBB25_4
+; RV32I-NEXT: .LBB25_2:
; RV32I-NEXT: sltu a0, a2, a0
-; RV32I-NEXT: bnez a0, .LBB23_4
-; RV32I-NEXT: .LBB23_3:
+; RV32I-NEXT: bnez a0, .LBB25_4
+; RV32I-NEXT: .LBB25_3:
; RV32I-NEXT: mv a4, a6
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB23_4:
+; RV32I-NEXT: .LBB25_4:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: setgt:
; RV64I: # %bb.0:
-; RV64I-NEXT: blt a1, a0, .LBB23_2
+; RV64I-NEXT: blt a1, a0, .LBB25_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB23_2:
+; RV64I-NEXT: .LBB25_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1261,28 +1362,28 @@ define i64 @setgt(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
define i64 @setge(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setge:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB24_2
+; RV32I-NEXT: beq a1, a3, .LBB26_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB24_3
-; RV32I-NEXT: j .LBB24_4
-; RV32I-NEXT: .LBB24_2:
+; RV32I-NEXT: bnez a0, .LBB26_3
+; RV32I-NEXT: j .LBB26_4
+; RV32I-NEXT: .LBB26_2:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB24_4
-; RV32I-NEXT: .LBB24_3:
+; RV32I-NEXT: beqz a0, .LBB26_4
+; RV32I-NEXT: .LBB26_3:
; RV32I-NEXT: mv a4, a6
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB24_4:
+; RV32I-NEXT: .LBB26_4:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: setge:
; RV64I: # %bb.0:
-; RV64I-NEXT: bge a0, a1, .LBB24_2
+; RV64I-NEXT: bge a0, a1, .LBB26_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB24_2:
+; RV64I-NEXT: .LBB26_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1332,28 +1433,28 @@ define i64 @setge(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
define i64 @setlt(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setlt:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB25_2
+; RV32I-NEXT: beq a1, a3, .LBB27_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a0, a1, a3
-; RV32I-NEXT: beqz a0, .LBB25_3
-; RV32I-NEXT: j .LBB25_4
-; RV32I-NEXT: .LBB25_2:
+; RV32I-NEXT: beqz a0, .LBB27_3
+; RV32I-NEXT: j .LBB27_4
+; RV32I-NEXT: .LBB27_2:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: bnez a0, .LBB25_4
-; RV32I-NEXT: .LBB25_3:
+; RV32I-NEXT: bnez a0, .LBB27_4
+; RV32I-NEXT: .LBB27_3:
; RV32I-NEXT: mv a4, a6
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB25_4:
+; RV32I-NEXT: .LBB27_4:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: setlt:
; RV64I: # %bb.0:
-; RV64I-NEXT: blt a0, a1, .LBB25_2
+; RV64I-NEXT: blt a0, a1, .LBB27_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB25_2:
+; RV64I-NEXT: .LBB27_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1403,28 +1504,28 @@ define i64 @setlt(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
define i64 @setle(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setle:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB26_2
+; RV32I-NEXT: beq a1, a3, .LBB28_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a0, a3, a1
-; RV32I-NEXT: bnez a0, .LBB26_3
-; RV32I-NEXT: j .LBB26_4
-; RV32I-NEXT: .LBB26_2:
+; RV32I-NEXT: bnez a0, .LBB28_3
+; RV32I-NEXT: j .LBB28_4
+; RV32I-NEXT: .LBB28_2:
; RV32I-NEXT: sltu a0, a2, a0
-; RV32I-NEXT: beqz a0, .LBB26_4
-; RV32I-NEXT: .LBB26_3:
+; RV32I-NEXT: beqz a0, .LBB28_4
+; RV32I-NEXT: .LBB28_3:
; RV32I-NEXT: mv a4, a6
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB26_4:
+; RV32I-NEXT: .LBB28_4:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: setle:
; RV64I: # %bb.0:
-; RV64I-NEXT: bge a1, a0, .LBB26_2
+; RV64I-NEXT: bge a1, a0, .LBB28_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB26_2:
+; RV64I-NEXT: .LBB28_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1474,28 +1575,28 @@ define i64 @setle(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
define i64 @setugt(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setugt:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB27_2
+; RV32I-NEXT: beq a1, a3, .LBB29_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a0, a3, a1
-; RV32I-NEXT: beqz a0, .LBB27_3
-; RV32I-NEXT: j .LBB27_4
-; RV32I-NEXT: .LBB27_2:
+; RV32I-NEXT: beqz a0, .LBB29_3
+; RV32I-NEXT: j .LBB29_4
+; RV32I-NEXT: .LBB29_2:
; RV32I-NEXT: sltu a0, a2, a0
-; RV32I-NEXT: bnez a0, .LBB27_4
-; RV32I-NEXT: .LBB27_3:
+; RV32I-NEXT: bnez a0, .LBB29_4
+; RV32I-NEXT: .LBB29_3:
; RV32I-NEXT: mv a4, a6
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB27_4:
+; RV32I-NEXT: .LBB29_4:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: setugt:
; RV64I: # %bb.0:
-; RV64I-NEXT: bltu a1, a0, .LBB27_2
+; RV64I-NEXT: bltu a1, a0, .LBB29_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB27_2:
+; RV64I-NEXT: .LBB29_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1545,28 +1646,28 @@ define i64 @setugt(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
define i64 @setuge(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setuge:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB28_2
+; RV32I-NEXT: beq a1, a3, .LBB30_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a0, a1, a3
-; RV32I-NEXT: bnez a0, .LBB28_3
-; RV32I-NEXT: j .LBB28_4
-; RV32I-NEXT: .LBB28_2:
+; RV32I-NEXT: bnez a0, .LBB30_3
+; RV32I-NEXT: j .LBB30_4
+; RV32I-NEXT: .LBB30_2:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: beqz a0, .LBB28_4
-; RV32I-NEXT: .LBB28_3:
+; RV32I-NEXT: beqz a0, .LBB30_4
+; RV32I-NEXT: .LBB30_3:
; RV32I-NEXT: mv a4, a6
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB28_4:
+; RV32I-NEXT: .LBB30_4:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: setuge:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgeu a0, a1, .LBB28_2
+; RV64I-NEXT: bgeu a0, a1, .LBB30_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB28_2:
+; RV64I-NEXT: .LBB30_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1616,28 +1717,28 @@ define i64 @setuge(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
define i64 @setult(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setult:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB29_2
+; RV32I-NEXT: beq a1, a3, .LBB31_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a0, a1, a3
-; RV32I-NEXT: beqz a0, .LBB29_3
-; RV32I-NEXT: j .LBB29_4
-; RV32I-NEXT: .LBB29_2:
+; RV32I-NEXT: beqz a0, .LBB31_3
+; RV32I-NEXT: j .LBB31_4
+; RV32I-NEXT: .LBB31_2:
; RV32I-NEXT: sltu a0, a0, a2
-; RV32I-NEXT: bnez a0, .LBB29_4
-; RV32I-NEXT: .LBB29_3:
+; RV32I-NEXT: bnez a0, .LBB31_4
+; RV32I-NEXT: .LBB31_3:
; RV32I-NEXT: mv a4, a6
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB29_4:
+; RV32I-NEXT: .LBB31_4:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: setult:
; RV64I: # %bb.0:
-; RV64I-NEXT: bltu a0, a1, .LBB29_2
+; RV64I-NEXT: bltu a0, a1, .LBB31_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB29_2:
+; RV64I-NEXT: .LBB31_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1687,28 +1788,28 @@ define i64 @setult(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
define i64 @setule(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setule:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB30_2
+; RV32I-NEXT: beq a1, a3, .LBB32_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a0, a3, a1
-; RV32I-NEXT: bnez a0, .LBB30_3
-; RV32I-NEXT: j .LBB30_4
-; RV32I-NEXT: .LBB30_2:
+; RV32I-NEXT: bnez a0, .LBB32_3
+; RV32I-NEXT: j .LBB32_4
+; RV32I-NEXT: .LBB32_2:
; RV32I-NEXT: sltu a0, a2, a0
-; RV32I-NEXT: beqz a0, .LBB30_4
-; RV32I-NEXT: .LBB30_3:
+; RV32I-NEXT: beqz a0, .LBB32_4
+; RV32I-NEXT: .LBB32_3:
; RV32I-NEXT: mv a4, a6
; RV32I-NEXT: mv a5, a7
-; RV32I-NEXT: .LBB30_4:
+; RV32I-NEXT: .LBB32_4:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: setule:
; RV64I: # %bb.0:
-; RV64I-NEXT: bgeu a1, a0, .LBB30_2
+; RV64I-NEXT: bgeu a1, a0, .LBB32_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a2, a3
-; RV64I-NEXT: .LBB30_2:
+; RV64I-NEXT: .LBB32_2:
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: ret
;
@@ -1761,19 +1862,19 @@ define i64 @seteq_zero(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-NEXT: or a6, a0, a1
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz a6, .LBB31_2
+; RV32I-NEXT: beqz a6, .LBB33_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: .LBB31_2:
+; RV32I-NEXT: .LBB33_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq_zero:
; RV64I: # %bb.0:
-; RV64I-NEXT: beqz a0, .LBB31_2
+; RV64I-NEXT: beqz a0, .LBB33_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a2
-; RV64I-NEXT: .LBB31_2:
+; RV64I-NEXT: .LBB33_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -1818,19 +1919,19 @@ define i64 @setne_zero(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-NEXT: or a6, a0, a1
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB32_2
+; RV32I-NEXT: bnez a6, .LBB34_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: .LBB32_2:
+; RV32I-NEXT: .LBB34_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: setne_zero:
; RV64I: # %bb.0:
-; RV64I-NEXT: bnez a0, .LBB32_2
+; RV64I-NEXT: bnez a0, .LBB34_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a2
-; RV64I-NEXT: .LBB32_2:
+; RV64I-NEXT: .LBB34_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -1876,20 +1977,20 @@ define i64 @seteq_constant(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-NEXT: or a6, a0, a1
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz a6, .LBB33_2
+; RV32I-NEXT: beqz a6, .LBB35_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: .LBB33_2:
+; RV32I-NEXT: .LBB35_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq_constant:
; RV64I: # %bb.0:
; RV64I-NEXT: li a3, 123
-; RV64I-NEXT: beq a0, a3, .LBB33_2
+; RV64I-NEXT: beq a0, a3, .LBB35_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a2
-; RV64I-NEXT: .LBB33_2:
+; RV64I-NEXT: .LBB35_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -1939,20 +2040,20 @@ define i64 @setne_constant(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-NEXT: or a6, a0, a1
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB34_2
+; RV32I-NEXT: bnez a6, .LBB36_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: .LBB34_2:
+; RV32I-NEXT: .LBB36_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: setne_constant:
; RV64I: # %bb.0:
; RV64I-NEXT: li a3, 456
-; RV64I-NEXT: bne a0, a3, .LBB34_2
+; RV64I-NEXT: bne a0, a3, .LBB36_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a2
-; RV64I-NEXT: .LBB34_2:
+; RV64I-NEXT: .LBB36_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -1998,27 +2099,24 @@ define i64 @setne_constant(i64 %a, i64 %rs1, i64 %rs2) {
define i64 @seteq_2048(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: seteq_2048:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a6, 1
-; RV32I-NEXT: slli a6, a6, 11
-; RV32I-NEXT: xor a0, a0, a6
+; RV32I-NEXT: binvi a0, a0, 11
; RV32I-NEXT: or a6, a0, a1
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz a6, .LBB35_2
+; RV32I-NEXT: beqz a6, .LBB37_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: .LBB35_2:
+; RV32I-NEXT: .LBB37_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq_2048:
; RV64I: # %bb.0:
-; RV64I-NEXT: li a3, 1
-; RV64I-NEXT: slli a3, a3, 11
-; RV64I-NEXT: beq a0, a3, .LBB35_2
+; RV64I-NEXT: bseti a3, zero, 11
+; RV64I-NEXT: beq a0, a3, .LBB37_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a2
-; RV64I-NEXT: .LBB35_2:
+; RV64I-NEXT: .LBB37_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -2039,9 +2137,7 @@ define i64 @seteq_2048(i64 %a, i64 %rs1, i64 %rs2) {
;
; RV32ZICOND-LABEL: seteq_2048:
; RV32ZICOND: # %bb.0:
-; RV32ZICOND-NEXT: li a6, 1
-; RV32ZICOND-NEXT: slli a6, a6, 11
-; RV32ZICOND-NEXT: xor a0, a0, a6
+; RV32ZICOND-NEXT: binvi a0, a0, 11
; RV32ZICOND-NEXT: or a1, a0, a1
; RV32ZICOND-NEXT: czero.eqz a0, a4, a1
; RV32ZICOND-NEXT: czero.nez a2, a2, a1
@@ -2071,20 +2167,20 @@ define i64 @seteq_neg2048(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-NEXT: or a6, a0, a1
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz a6, .LBB36_2
+; RV32I-NEXT: beqz a6, .LBB38_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: .LBB36_2:
+; RV32I-NEXT: .LBB38_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq_neg2048:
; RV64I: # %bb.0:
; RV64I-NEXT: li a3, -2048
-; RV64I-NEXT: beq a0, a3, .LBB36_2
+; RV64I-NEXT: beq a0, a3, .LBB38_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a2
-; RV64I-NEXT: .LBB36_2:
+; RV64I-NEXT: .LBB38_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -2136,20 +2232,20 @@ define i64 @setne_neg2048(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-NEXT: or a6, a0, a1
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB37_2
+; RV32I-NEXT: bnez a6, .LBB39_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
-; RV32I-NEXT: .LBB37_2:
+; RV32I-NEXT: .LBB39_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: setne_neg2048:
; RV64I: # %bb.0:
; RV64I-NEXT: li a3, -2048
-; RV64I-NEXT: bne a0, a3, .LBB37_2
+; RV64I-NEXT: bne a0, a3, .LBB39_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a2
-; RV64I-NEXT: .LBB37_2:
+; RV64I-NEXT: .LBB39_2:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ret
;
@@ -3000,12 +3096,12 @@ define void @sextw_removal_maskc(i1 %c, i32 signext %arg, i32 signext %arg1) nou
; RV32I-NEXT: slli a0, a0, 31
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: and s1, a0, a1
-; RV32I-NEXT: .LBB54_1: # %bb2
+; RV32I-NEXT: .LBB56_1: # %bb2
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call bar at plt
; RV32I-NEXT: sll s1, s1, s0
-; RV32I-NEXT: bnez a0, .LBB54_1
+; RV32I-NEXT: bnez a0, .LBB56_1
; RV32I-NEXT: # %bb.2: # %bb7
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -3023,12 +3119,12 @@ define void @sextw_removal_maskc(i1 %c, i32 signext %arg, i32 signext %arg1) nou
; RV64I-NEXT: slli a0, a0, 63
; RV64I-NEXT: srai a0, a0, 63
; RV64I-NEXT: and s1, a0, a1
-; RV64I-NEXT: .LBB54_1: # %bb2
+; RV64I-NEXT: .LBB56_1: # %bb2
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call bar at plt
; RV64I-NEXT: sllw s1, s1, s0
-; RV64I-NEXT: bnez a0, .LBB54_1
+; RV64I-NEXT: bnez a0, .LBB56_1
; RV64I-NEXT: # %bb.2: # %bb7
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3045,12 +3141,12 @@ define void @sextw_removal_maskc(i1 %c, i32 signext %arg, i32 signext %arg1) nou
; RV64XVENTANACONDOPS-NEXT: mv s0, a2
; RV64XVENTANACONDOPS-NEXT: andi a0, a0, 1
; RV64XVENTANACONDOPS-NEXT: vt.maskc s1, a1, a0
-; RV64XVENTANACONDOPS-NEXT: .LBB54_1: # %bb2
+; RV64XVENTANACONDOPS-NEXT: .LBB56_1: # %bb2
; RV64XVENTANACONDOPS-NEXT: # =>This Inner Loop Header: Depth=1
; RV64XVENTANACONDOPS-NEXT: mv a0, s1
; RV64XVENTANACONDOPS-NEXT: call bar at plt
; RV64XVENTANACONDOPS-NEXT: sllw s1, s1, s0
-; RV64XVENTANACONDOPS-NEXT: bnez a0, .LBB54_1
+; RV64XVENTANACONDOPS-NEXT: bnez a0, .LBB56_1
; RV64XVENTANACONDOPS-NEXT: # %bb.2: # %bb7
; RV64XVENTANACONDOPS-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64XVENTANACONDOPS-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3068,12 +3164,12 @@ define void @sextw_removal_maskc(i1 %c, i32 signext %arg, i32 signext %arg1) nou
; RV64XTHEADCONDMOV-NEXT: mv s1, a1
; RV64XTHEADCONDMOV-NEXT: andi a0, a0, 1
; RV64XTHEADCONDMOV-NEXT: th.mveqz s1, zero, a0
-; RV64XTHEADCONDMOV-NEXT: .LBB54_1: # %bb2
+; RV64XTHEADCONDMOV-NEXT: .LBB56_1: # %bb2
; RV64XTHEADCONDMOV-NEXT: # =>This Inner Loop Header: Depth=1
; RV64XTHEADCONDMOV-NEXT: sext.w a0, s1
; RV64XTHEADCONDMOV-NEXT: call bar at plt
; RV64XTHEADCONDMOV-NEXT: sllw s1, s1, s0
-; RV64XTHEADCONDMOV-NEXT: bnez a0, .LBB54_1
+; RV64XTHEADCONDMOV-NEXT: bnez a0, .LBB56_1
; RV64XTHEADCONDMOV-NEXT: # %bb.2: # %bb7
; RV64XTHEADCONDMOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64XTHEADCONDMOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3090,12 +3186,12 @@ define void @sextw_removal_maskc(i1 %c, i32 signext %arg, i32 signext %arg1) nou
; RV32ZICOND-NEXT: mv s0, a2
; RV32ZICOND-NEXT: andi a0, a0, 1
; RV32ZICOND-NEXT: czero.eqz s1, a1, a0
-; RV32ZICOND-NEXT: .LBB54_1: # %bb2
+; RV32ZICOND-NEXT: .LBB56_1: # %bb2
; RV32ZICOND-NEXT: # =>This Inner Loop Header: Depth=1
; RV32ZICOND-NEXT: mv a0, s1
; RV32ZICOND-NEXT: call bar at plt
; RV32ZICOND-NEXT: sll s1, s1, s0
-; RV32ZICOND-NEXT: bnez a0, .LBB54_1
+; RV32ZICOND-NEXT: bnez a0, .LBB56_1
; RV32ZICOND-NEXT: # %bb.2: # %bb7
; RV32ZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ZICOND-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -3112,12 +3208,12 @@ define void @sextw_removal_maskc(i1 %c, i32 signext %arg, i32 signext %arg1) nou
; RV64ZICOND-NEXT: mv s0, a2
; RV64ZICOND-NEXT: andi a0, a0, 1
; RV64ZICOND-NEXT: czero.eqz s1, a1, a0
-; RV64ZICOND-NEXT: .LBB54_1: # %bb2
+; RV64ZICOND-NEXT: .LBB56_1: # %bb2
; RV64ZICOND-NEXT: # =>This Inner Loop Header: Depth=1
; RV64ZICOND-NEXT: mv a0, s1
; RV64ZICOND-NEXT: call bar at plt
; RV64ZICOND-NEXT: sllw s1, s1, s0
-; RV64ZICOND-NEXT: bnez a0, .LBB54_1
+; RV64ZICOND-NEXT: bnez a0, .LBB56_1
; RV64ZICOND-NEXT: # %bb.2: # %bb7
; RV64ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3151,12 +3247,12 @@ define void @sextw_removal_maskcn(i1 %c, i32 signext %arg, i32 signext %arg1) no
; RV32I-NEXT: andi a0, a0, 1
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and s1, a0, a1
-; RV32I-NEXT: .LBB55_1: # %bb2
+; RV32I-NEXT: .LBB57_1: # %bb2
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call bar at plt
; RV32I-NEXT: sll s1, s1, s0
-; RV32I-NEXT: bnez a0, .LBB55_1
+; RV32I-NEXT: bnez a0, .LBB57_1
; RV32I-NEXT: # %bb.2: # %bb7
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -3174,12 +3270,12 @@ define void @sextw_removal_maskcn(i1 %c, i32 signext %arg, i32 signext %arg1) no
; RV64I-NEXT: andi a0, a0, 1
; RV64I-NEXT: addiw a0, a0, -1
; RV64I-NEXT: and s1, a0, a1
-; RV64I-NEXT: .LBB55_1: # %bb2
+; RV64I-NEXT: .LBB57_1: # %bb2
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call bar at plt
; RV64I-NEXT: sllw s1, s1, s0
-; RV64I-NEXT: bnez a0, .LBB55_1
+; RV64I-NEXT: bnez a0, .LBB57_1
; RV64I-NEXT: # %bb.2: # %bb7
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3196,12 +3292,12 @@ define void @sextw_removal_maskcn(i1 %c, i32 signext %arg, i32 signext %arg1) no
; RV64XVENTANACONDOPS-NEXT: mv s0, a2
; RV64XVENTANACONDOPS-NEXT: andi a0, a0, 1
; RV64XVENTANACONDOPS-NEXT: vt.maskcn s1, a1, a0
-; RV64XVENTANACONDOPS-NEXT: .LBB55_1: # %bb2
+; RV64XVENTANACONDOPS-NEXT: .LBB57_1: # %bb2
; RV64XVENTANACONDOPS-NEXT: # =>This Inner Loop Header: Depth=1
; RV64XVENTANACONDOPS-NEXT: mv a0, s1
; RV64XVENTANACONDOPS-NEXT: call bar at plt
; RV64XVENTANACONDOPS-NEXT: sllw s1, s1, s0
-; RV64XVENTANACONDOPS-NEXT: bnez a0, .LBB55_1
+; RV64XVENTANACONDOPS-NEXT: bnez a0, .LBB57_1
; RV64XVENTANACONDOPS-NEXT: # %bb.2: # %bb7
; RV64XVENTANACONDOPS-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64XVENTANACONDOPS-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3219,12 +3315,12 @@ define void @sextw_removal_maskcn(i1 %c, i32 signext %arg, i32 signext %arg1) no
; RV64XTHEADCONDMOV-NEXT: mv s1, a1
; RV64XTHEADCONDMOV-NEXT: andi a0, a0, 1
; RV64XTHEADCONDMOV-NEXT: th.mvnez s1, zero, a0
-; RV64XTHEADCONDMOV-NEXT: .LBB55_1: # %bb2
+; RV64XTHEADCONDMOV-NEXT: .LBB57_1: # %bb2
; RV64XTHEADCONDMOV-NEXT: # =>This Inner Loop Header: Depth=1
; RV64XTHEADCONDMOV-NEXT: sext.w a0, s1
; RV64XTHEADCONDMOV-NEXT: call bar at plt
; RV64XTHEADCONDMOV-NEXT: sllw s1, s1, s0
-; RV64XTHEADCONDMOV-NEXT: bnez a0, .LBB55_1
+; RV64XTHEADCONDMOV-NEXT: bnez a0, .LBB57_1
; RV64XTHEADCONDMOV-NEXT: # %bb.2: # %bb7
; RV64XTHEADCONDMOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64XTHEADCONDMOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3241,12 +3337,12 @@ define void @sextw_removal_maskcn(i1 %c, i32 signext %arg, i32 signext %arg1) no
; RV32ZICOND-NEXT: mv s0, a2
; RV32ZICOND-NEXT: andi a0, a0, 1
; RV32ZICOND-NEXT: czero.nez s1, a1, a0
-; RV32ZICOND-NEXT: .LBB55_1: # %bb2
+; RV32ZICOND-NEXT: .LBB57_1: # %bb2
; RV32ZICOND-NEXT: # =>This Inner Loop Header: Depth=1
; RV32ZICOND-NEXT: mv a0, s1
; RV32ZICOND-NEXT: call bar at plt
; RV32ZICOND-NEXT: sll s1, s1, s0
-; RV32ZICOND-NEXT: bnez a0, .LBB55_1
+; RV32ZICOND-NEXT: bnez a0, .LBB57_1
; RV32ZICOND-NEXT: # %bb.2: # %bb7
; RV32ZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ZICOND-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -3263,12 +3359,12 @@ define void @sextw_removal_maskcn(i1 %c, i32 signext %arg, i32 signext %arg1) no
; RV64ZICOND-NEXT: mv s0, a2
; RV64ZICOND-NEXT: andi a0, a0, 1
; RV64ZICOND-NEXT: czero.nez s1, a1, a0
-; RV64ZICOND-NEXT: .LBB55_1: # %bb2
+; RV64ZICOND-NEXT: .LBB57_1: # %bb2
; RV64ZICOND-NEXT: # =>This Inner Loop Header: Depth=1
; RV64ZICOND-NEXT: mv a0, s1
; RV64ZICOND-NEXT: call bar at plt
; RV64ZICOND-NEXT: sllw s1, s1, s0
-; RV64ZICOND-NEXT: bnez a0, .LBB55_1
+; RV64ZICOND-NEXT: bnez a0, .LBB57_1
; RV64ZICOND-NEXT: # %bb.2: # %bb7
; RV64ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3294,19 +3390,19 @@ define i32 @setune_32(float %a, float %b, i32 %rs1, i32 %rs2) {
; RV32I-LABEL: setune_32:
; RV32I: # %bb.0:
; RV32I-NEXT: feq.s a2, fa0, fa1
-; RV32I-NEXT: beqz a2, .LBB56_2
+; RV32I-NEXT: beqz a2, .LBB58_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: .LBB56_2:
+; RV32I-NEXT: .LBB58_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: setune_32:
; RV64I: # %bb.0:
; RV64I-NEXT: feq.s a2, fa0, fa1
-; RV64I-NEXT: beqz a2, .LBB56_2
+; RV64I-NEXT: beqz a2, .LBB58_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB56_2:
+; RV64I-NEXT: .LBB58_2:
; RV64I-NEXT: ret
;
; RV64XVENTANACONDOPS-LABEL: setune_32:
@@ -3347,20 +3443,20 @@ define i64 @setune_64(float %a, float %b, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setune_64:
; RV32I: # %bb.0:
; RV32I-NEXT: feq.s a4, fa0, fa1
-; RV32I-NEXT: beqz a4, .LBB57_2
+; RV32I-NEXT: beqz a4, .LBB59_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: mv a1, a3
-; RV32I-NEXT: .LBB57_2:
+; RV32I-NEXT: .LBB59_2:
; RV32I-NEXT: ret
;
; RV64I-LABEL: setune_64:
; RV64I: # %bb.0:
; RV64I-NEXT: feq.s a2, fa0, fa1
-; RV64I-NEXT: beqz a2, .LBB57_2
+; RV64I-NEXT: beqz a2, .LBB59_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
-; RV64I-NEXT: .LBB57_2:
+; RV64I-NEXT: .LBB59_2:
; RV64I-NEXT: ret
;
; RV64XVENTANACONDOPS-LABEL: setune_64:
@@ -3409,15 +3505,15 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a3
-; RV32I-NEXT: beqz a0, .LBB58_2
+; RV32I-NEXT: beqz a0, .LBB60_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv s0, a2
-; RV32I-NEXT: .LBB58_2:
-; RV32I-NEXT: beqz a1, .LBB58_4
+; RV32I-NEXT: .LBB60_2:
+; RV32I-NEXT: beqz a1, .LBB60_4
; RV32I-NEXT: # %bb.3:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call bat at plt
-; RV32I-NEXT: .LBB58_4:
+; RV32I-NEXT: .LBB60_4:
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -3430,15 +3526,15 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a3
-; RV64I-NEXT: beqz a0, .LBB58_2
+; RV64I-NEXT: beqz a0, .LBB60_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv s0, a2
-; RV64I-NEXT: .LBB58_2:
-; RV64I-NEXT: beqz a1, .LBB58_4
+; RV64I-NEXT: .LBB60_2:
+; RV64I-NEXT: beqz a1, .LBB60_4
; RV64I-NEXT: # %bb.3:
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call bat at plt
-; RV64I-NEXT: .LBB58_4:
+; RV64I-NEXT: .LBB60_4:
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -3453,11 +3549,11 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
; RV64XVENTANACONDOPS-NEXT: vt.maskc a2, a2, a0
; RV64XVENTANACONDOPS-NEXT: vt.maskcn s0, a3, a0
; RV64XVENTANACONDOPS-NEXT: or s0, s0, a2
-; RV64XVENTANACONDOPS-NEXT: beqz a1, .LBB58_2
+; RV64XVENTANACONDOPS-NEXT: beqz a1, .LBB60_2
; RV64XVENTANACONDOPS-NEXT: # %bb.1:
; RV64XVENTANACONDOPS-NEXT: mv a0, s0
; RV64XVENTANACONDOPS-NEXT: call bat at plt
-; RV64XVENTANACONDOPS-NEXT: .LBB58_2:
+; RV64XVENTANACONDOPS-NEXT: .LBB60_2:
; RV64XVENTANACONDOPS-NEXT: mv a0, s0
; RV64XVENTANACONDOPS-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64XVENTANACONDOPS-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -3471,11 +3567,11 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
; RV64XTHEADCONDMOV-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64XTHEADCONDMOV-NEXT: mv s0, a2
; RV64XTHEADCONDMOV-NEXT: th.mveqz s0, a3, a0
-; RV64XTHEADCONDMOV-NEXT: beqz a1, .LBB58_2
+; RV64XTHEADCONDMOV-NEXT: beqz a1, .LBB60_2
; RV64XTHEADCONDMOV-NEXT: # %bb.1:
; RV64XTHEADCONDMOV-NEXT: mv a0, s0
; RV64XTHEADCONDMOV-NEXT: call bat at plt
-; RV64XTHEADCONDMOV-NEXT: .LBB58_2:
+; RV64XTHEADCONDMOV-NEXT: .LBB60_2:
; RV64XTHEADCONDMOV-NEXT: mv a0, s0
; RV64XTHEADCONDMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64XTHEADCONDMOV-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -3490,11 +3586,11 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
; RV32ZICOND-NEXT: czero.eqz a2, a2, a0
; RV32ZICOND-NEXT: czero.nez s0, a3, a0
; RV32ZICOND-NEXT: or s0, s0, a2
-; RV32ZICOND-NEXT: beqz a1, .LBB58_2
+; RV32ZICOND-NEXT: beqz a1, .LBB60_2
; RV32ZICOND-NEXT: # %bb.1:
; RV32ZICOND-NEXT: mv a0, s0
; RV32ZICOND-NEXT: call bat at plt
-; RV32ZICOND-NEXT: .LBB58_2:
+; RV32ZICOND-NEXT: .LBB60_2:
; RV32ZICOND-NEXT: mv a0, s0
; RV32ZICOND-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ZICOND-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -3509,11 +3605,11 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
; RV64ZICOND-NEXT: czero.eqz a2, a2, a0
; RV64ZICOND-NEXT: czero.nez s0, a3, a0
; RV64ZICOND-NEXT: or s0, s0, a2
-; RV64ZICOND-NEXT: beqz a1, .LBB58_2
+; RV64ZICOND-NEXT: beqz a1, .LBB60_2
; RV64ZICOND-NEXT: # %bb.1:
; RV64ZICOND-NEXT: mv a0, s0
; RV64ZICOND-NEXT: call bat at plt
-; RV64ZICOND-NEXT: .LBB58_2:
+; RV64ZICOND-NEXT: .LBB60_2:
; RV64ZICOND-NEXT: mv a0, s0
; RV64ZICOND-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64ZICOND-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
>From 396f84b4d5baaa9f83ae217cf5014d2f28b606eb Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Tue, 28 Nov 2023 05:44:12 -0800
Subject: [PATCH 2/2] [RISCV] Generate bexti for (setcc eq (and x, c)) where c
is power of 2.
Currently, llvm can transform (setcc ne (and x, c)) to (bexti x, log2(c)) where
c is power of 2.
This patch transform (select (setcc ne (and x, c)), T, F) into (select (setcc eq (and x, c)), F, T).
It is benefit to the case c is not fit to 12-bits.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 32 +++++++++++++++++++++
llvm/test/CodeGen/RISCV/condops.ll | 15 ++++------
2 files changed, 38 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a417b6fe05e59df..0e4939318b27490 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14182,11 +14182,43 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
}
+static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue Cond = N->getOperand(0);
+ SDValue True = N->getOperand(1);
+ SDValue False = N->getOperand(2);
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT CondVT = Cond.getValueType();
+
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
+ return SDValue();
+
+ // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
+ // BEXTI, where C is power of 2.
+ if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
+ (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getAPIntValue().isPowerOf2() &&
+ isNullConstant(RHS))
+ return DAG.getSelect(
+ DL, VT, DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE), False, True);
+ }
+ return SDValue();
+}
+
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
return Folded;
+ if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
+ return V;
+
if (Subtarget.hasShortForwardBranchOpt())
return SDValue();
diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll
index 08b120d1251238d..97ba53a41ede4ec 100644
--- a/llvm/test/CodeGen/RISCV/condops.ll
+++ b/llvm/test/CodeGen/RISCV/condops.ll
@@ -100,8 +100,7 @@ define i64 @zero_singlebit1(i64 %rs1, i64 %rs2) {
;
; RV64XVENTANACONDOPS-LABEL: zero_singlebit1:
; RV64XVENTANACONDOPS: # %bb.0:
-; RV64XVENTANACONDOPS-NEXT: lui a2, 1
-; RV64XVENTANACONDOPS-NEXT: and a1, a1, a2
+; RV64XVENTANACONDOPS-NEXT: bexti a1, a1, 12
; RV64XVENTANACONDOPS-NEXT: vt.maskcn a0, a0, a1
; RV64XVENTANACONDOPS-NEXT: ret
;
@@ -114,16 +113,14 @@ define i64 @zero_singlebit1(i64 %rs1, i64 %rs2) {
;
; RV32ZICOND-LABEL: zero_singlebit1:
; RV32ZICOND: # %bb.0:
-; RV32ZICOND-NEXT: lui a3, 1
-; RV32ZICOND-NEXT: and a2, a2, a3
+; RV32ZICOND-NEXT: bexti a2, a2, 12
; RV32ZICOND-NEXT: czero.nez a0, a0, a2
; RV32ZICOND-NEXT: czero.nez a1, a1, a2
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: zero_singlebit1:
; RV64ZICOND: # %bb.0:
-; RV64ZICOND-NEXT: lui a2, 1
-; RV64ZICOND-NEXT: and a1, a1, a2
+; RV64ZICOND-NEXT: bexti a1, a1, 12
; RV64ZICOND-NEXT: czero.nez a0, a0, a1
; RV64ZICOND-NEXT: ret
%and = and i64 %rs2, 4096
@@ -3648,7 +3645,7 @@ define i64 @single_bit(i64 %x) {
;
; RV64XVENTANACONDOPS-LABEL: single_bit:
; RV64XVENTANACONDOPS: # %bb.0: # %entry
-; RV64XVENTANACONDOPS-NEXT: andi a1, a0, 1024
+; RV64XVENTANACONDOPS-NEXT: bexti a1, a0, 10
; RV64XVENTANACONDOPS-NEXT: vt.maskc a0, a0, a1
; RV64XVENTANACONDOPS-NEXT: ret
;
@@ -3661,14 +3658,14 @@ define i64 @single_bit(i64 %x) {
;
; RV32ZICOND-LABEL: single_bit:
; RV32ZICOND: # %bb.0: # %entry
-; RV32ZICOND-NEXT: andi a2, a0, 1024
+; RV32ZICOND-NEXT: bexti a2, a0, 10
; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
; RV32ZICOND-NEXT: czero.eqz a1, a1, a2
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: single_bit:
; RV64ZICOND: # %bb.0: # %entry
-; RV64ZICOND-NEXT: andi a1, a0, 1024
+; RV64ZICOND-NEXT: bexti a1, a0, 10
; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
; RV64ZICOND-NEXT: ret
entry:
More information about the llvm-commits
mailing list