[llvm] [RISCV] Add ISel pattern for generating QC_BREV32 (PR #145288)
Sudharsan Veeravalli via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 23 01:14:22 PDT 2025
https://github.com/svs-quic created https://github.com/llvm/llvm-project/pull/145288
The `QC_BREV32` instruction reverses the bit order of `rs1` and writes the result to `rd`
>From c0930352c4d62513eb636a03e626c708ef0d59fc Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Mon, 23 Jun 2025 13:32:38 +0530
Subject: [PATCH] [RISCV] Add ISel pattern for generating QC_BREV32
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +-
llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 2 +
...cibm-cto-clo.ll => xqcibm-cto-clo-brev.ll} | 303 ++++++++++++++++++
3 files changed, 307 insertions(+), 1 deletion(-)
rename llvm/test/CodeGen/RISCV/{xqcibm-cto-clo.ll => xqcibm-cto-clo-brev.ll} (76%)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9e568052079ce..4f2bc4bf12c0a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -384,7 +384,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
? Legal
: Expand);
- if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
+ if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
+ !Subtarget.is64Bit()) {
setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
} else {
// Zbkb can use rev8+brev8 to implement bitreverse.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 3d0cad7884fdb..b52798edbe143 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1448,6 +1448,8 @@ def : Pat<(i32 (and GPRNoX0:$rs, 255)), (QC_EXTU GPRNoX0:$rs, 8, 0)>;
def : Pat<(i32 (and GPRNoX0:$rs, 511)), (QC_EXTU GPRNoX0:$rs, 9, 0)>;
def : Pat<(i32 (and GPRNoX0:$rs, 1023)), (QC_EXTU GPRNoX0:$rs, 10, 0)>;
def : Pat<(i32 (and GPRNoX0:$rs, 2047)), (QC_EXTU GPRNoX0:$rs, 11, 0)>;
+
+def : Pat<(i32 (bitreverse GPRNoX0:$rs1)), (QC_BREV32 GPRNoX0:$rs1)>;
} // Predicates = [HasVendorXqcibm, IsRV32]
// If Zbb is enabled sext.b/h is preferred since they are compressible
diff --git a/llvm/test/CodeGen/RISCV/xqcibm-cto-clo.ll b/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll
similarity index 76%
rename from llvm/test/CodeGen/RISCV/xqcibm-cto-clo.ll
rename to llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll
index fe2bcf00ba7d4..691c5bec7fb51 100644
--- a/llvm/test/CodeGen/RISCV/xqcibm-cto-clo.ll
+++ b/llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll
@@ -10,11 +10,17 @@ declare i8 @llvm.cttz.i8(i8, i1)
declare i16 @llvm.cttz.i16(i16, i1)
declare i32 @llvm.cttz.i32(i32, i1)
declare i64 @llvm.cttz.i64(i64, i1)
+
declare i8 @llvm.ctlz.i8(i8, i1)
declare i16 @llvm.ctlz.i16(i16, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
declare i64 @llvm.ctlz.i64(i64, i1)
+declare i8 @llvm.bitreverse.i8(i8)
+declare i16 @llvm.bitreverse.i16(i16)
+declare i32 @llvm.bitreverse.i32(i32)
+declare i64 @llvm.bitreverse.i64(i64)
+
define i8 @test_cttz_i8(i8 %a) nounwind {
; RV32I-LABEL: test_cttz_i8:
; RV32I: # %bb.0:
@@ -956,3 +962,300 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind {
%tmp = call i64 @llvm.ctlz.i64(i64 %1, i1 true)
ret i64 %tmp
}
+
+define i8 @brev_i8(i8 %a0) {
+; RV32I-LABEL: brev_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a1, a0, 15
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: slli a1, a1, 4
+; RV32I-NEXT: srli a0, a0, 28
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: andi a1, a0, 51
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: slli a1, a1, 2
+; RV32I-NEXT: andi a0, a0, 51
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: andi a1, a0, 85
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: andi a0, a0, 85
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: brev_i8:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: andi a1, a0, 15
+; RV32ZBB-NEXT: slli a0, a0, 24
+; RV32ZBB-NEXT: slli a1, a1, 4
+; RV32ZBB-NEXT: srli a0, a0, 28
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: andi a1, a0, 51
+; RV32ZBB-NEXT: srli a0, a0, 2
+; RV32ZBB-NEXT: slli a1, a1, 2
+; RV32ZBB-NEXT: andi a0, a0, 51
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: andi a1, a0, 85
+; RV32ZBB-NEXT: srli a0, a0, 1
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: andi a0, a0, 85
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: ret
+;
+; RV32ZBBXQCIBM-LABEL: brev_i8:
+; RV32ZBBXQCIBM: # %bb.0:
+; RV32ZBBXQCIBM-NEXT: qc.brev32 a0, a0
+; RV32ZBBXQCIBM-NEXT: srli a0, a0, 24
+; RV32ZBBXQCIBM-NEXT: ret
+ %v0 = tail call i8 @llvm.bitreverse.i8(i8 %a0)
+ ret i8 %v0
+}
+
+define i16 @brev_i16(i16 %a0) {
+; RV32I-LABEL: brev_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: lui a2, 1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a2, a2, -241
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: lui a2, 3
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: slli a0, a0, 4
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: lui a2, 5
+; RV32I-NEXT: addi a2, a2, 1365
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: brev_i16:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: rev8 a0, a0
+; RV32ZBB-NEXT: lui a1, 15
+; RV32ZBB-NEXT: srli a2, a0, 12
+; RV32ZBB-NEXT: addi a1, a1, 240
+; RV32ZBB-NEXT: and a1, a2, a1
+; RV32ZBB-NEXT: lui a2, 3
+; RV32ZBB-NEXT: srli a0, a0, 20
+; RV32ZBB-NEXT: addi a2, a2, 819
+; RV32ZBB-NEXT: andi a0, a0, -241
+; RV32ZBB-NEXT: or a0, a0, a1
+; RV32ZBB-NEXT: srli a1, a0, 2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: lui a2, 5
+; RV32ZBB-NEXT: addi a2, a2, 1365
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: ret
+;
+; RV32ZBBXQCIBM-LABEL: brev_i16:
+; RV32ZBBXQCIBM: # %bb.0:
+; RV32ZBBXQCIBM-NEXT: qc.brev32 a0, a0
+; RV32ZBBXQCIBM-NEXT: srli a0, a0, 16
+; RV32ZBBXQCIBM-NEXT: ret
+ %v0 = tail call i16 @llvm.bitreverse.i16(i16 %a0)
+ ret i16 %v0
+}
+
+define i32 @brev_i32(i32 %a0) {
+; RV32I-LABEL: brev_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: addi a2, a2, -256
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: lui a3, 61681
+; RV32I-NEXT: slli a2, a2, 8
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a3, a3, -241
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: lui a3, 349525
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: addi a3, a3, 1365
+; RV32I-NEXT: slli a0, a0, 4
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: brev_i32:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: rev8 a0, a0
+; RV32ZBB-NEXT: lui a1, 61681
+; RV32ZBB-NEXT: srli a2, a0, 4
+; RV32ZBB-NEXT: addi a1, a1, -241
+; RV32ZBB-NEXT: and a2, a2, a1
+; RV32ZBB-NEXT: and a0, a0, a1
+; RV32ZBB-NEXT: lui a1, 209715
+; RV32ZBB-NEXT: addi a1, a1, 819
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a0, a2, a0
+; RV32ZBB-NEXT: srli a2, a0, 2
+; RV32ZBB-NEXT: and a0, a0, a1
+; RV32ZBB-NEXT: and a1, a2, a1
+; RV32ZBB-NEXT: lui a2, 349525
+; RV32ZBB-NEXT: addi a2, a2, 1365
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: srli a1, a0, 1
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: slli a0, a0, 1
+; RV32ZBB-NEXT: or a0, a1, a0
+; RV32ZBB-NEXT: ret
+;
+; RV32ZBBXQCIBM-LABEL: brev_i32:
+; RV32ZBBXQCIBM: # %bb.0:
+; RV32ZBBXQCIBM-NEXT: qc.brev32 a0, a0
+; RV32ZBBXQCIBM-NEXT: ret
+ %v0 = tail call i32 @llvm.bitreverse.i32(i32 %a0)
+ ret i32 %v0
+}
+
+define i64 @brev_i64(i64 %a0) {
+; RV32I-LABEL: brev_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a2, a1, 8
+; RV32I-NEXT: lui a3, 16
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: slli a5, a1, 24
+; RV32I-NEXT: lui a6, 61681
+; RV32I-NEXT: srli a7, a0, 8
+; RV32I-NEXT: addi a3, a3, -256
+; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: or a2, a2, a4
+; RV32I-NEXT: srli a4, a0, 24
+; RV32I-NEXT: and a7, a7, a3
+; RV32I-NEXT: or a4, a7, a4
+; RV32I-NEXT: lui a7, 209715
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: or a1, a5, a1
+; RV32I-NEXT: lui a5, 349525
+; RV32I-NEXT: and a3, a0, a3
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: addi a6, a6, -241
+; RV32I-NEXT: addi a7, a7, 819
+; RV32I-NEXT: addi a5, a5, 1365
+; RV32I-NEXT: slli a3, a3, 8
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: srli a2, a1, 4
+; RV32I-NEXT: and a1, a1, a6
+; RV32I-NEXT: srli a3, a0, 4
+; RV32I-NEXT: and a0, a0, a6
+; RV32I-NEXT: and a2, a2, a6
+; RV32I-NEXT: slli a1, a1, 4
+; RV32I-NEXT: and a3, a3, a6
+; RV32I-NEXT: slli a0, a0, 4
+; RV32I-NEXT: or a1, a2, a1
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: srli a2, a1, 2
+; RV32I-NEXT: and a1, a1, a7
+; RV32I-NEXT: srli a3, a0, 2
+; RV32I-NEXT: and a0, a0, a7
+; RV32I-NEXT: and a2, a2, a7
+; RV32I-NEXT: slli a1, a1, 2
+; RV32I-NEXT: and a3, a3, a7
+; RV32I-NEXT: slli a0, a0, 2
+; RV32I-NEXT: or a1, a2, a1
+; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: srli a2, a1, 1
+; RV32I-NEXT: and a1, a1, a5
+; RV32I-NEXT: srli a3, a0, 1
+; RV32I-NEXT: and a0, a0, a5
+; RV32I-NEXT: and a2, a2, a5
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: and a3, a3, a5
+; RV32I-NEXT: slli a4, a0, 1
+; RV32I-NEXT: or a0, a2, a1
+; RV32I-NEXT: or a1, a3, a4
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: brev_i64:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: rev8 a1, a1
+; RV32ZBB-NEXT: lui a2, 61681
+; RV32ZBB-NEXT: lui a3, 209715
+; RV32ZBB-NEXT: rev8 a0, a0
+; RV32ZBB-NEXT: srli a4, a1, 4
+; RV32ZBB-NEXT: addi a2, a2, -241
+; RV32ZBB-NEXT: srli a5, a0, 4
+; RV32ZBB-NEXT: and a4, a4, a2
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: and a5, a5, a2
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: lui a2, 349525
+; RV32ZBB-NEXT: addi a3, a3, 819
+; RV32ZBB-NEXT: addi a2, a2, 1365
+; RV32ZBB-NEXT: slli a1, a1, 4
+; RV32ZBB-NEXT: slli a0, a0, 4
+; RV32ZBB-NEXT: or a1, a4, a1
+; RV32ZBB-NEXT: or a0, a5, a0
+; RV32ZBB-NEXT: srli a4, a1, 2
+; RV32ZBB-NEXT: and a1, a1, a3
+; RV32ZBB-NEXT: srli a5, a0, 2
+; RV32ZBB-NEXT: and a0, a0, a3
+; RV32ZBB-NEXT: and a4, a4, a3
+; RV32ZBB-NEXT: slli a1, a1, 2
+; RV32ZBB-NEXT: and a3, a5, a3
+; RV32ZBB-NEXT: slli a0, a0, 2
+; RV32ZBB-NEXT: or a1, a4, a1
+; RV32ZBB-NEXT: or a0, a3, a0
+; RV32ZBB-NEXT: srli a3, a1, 1
+; RV32ZBB-NEXT: and a1, a1, a2
+; RV32ZBB-NEXT: srli a4, a0, 1
+; RV32ZBB-NEXT: and a0, a0, a2
+; RV32ZBB-NEXT: and a3, a3, a2
+; RV32ZBB-NEXT: slli a1, a1, 1
+; RV32ZBB-NEXT: and a2, a4, a2
+; RV32ZBB-NEXT: slli a4, a0, 1
+; RV32ZBB-NEXT: or a0, a3, a1
+; RV32ZBB-NEXT: or a1, a2, a4
+; RV32ZBB-NEXT: ret
+;
+; RV32ZBBXQCIBM-LABEL: brev_i64:
+; RV32ZBBXQCIBM: # %bb.0:
+; RV32ZBBXQCIBM-NEXT: qc.brev32 a2, a1
+; RV32ZBBXQCIBM-NEXT: qc.brev32 a1, a0
+; RV32ZBBXQCIBM-NEXT: mv a0, a2
+; RV32ZBBXQCIBM-NEXT: ret
+ %v0 = tail call i64 @llvm.bitreverse.i64(i64 %a0)
+ ret i64 %v0
+}
More information about the llvm-commits
mailing list