[llvm] [RISCV] Select unsigned bitfield insert for XAndesPerf (PR #142737)
Jim Lin via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 4 00:55:30 PDT 2025
https://github.com/tclin914 created https://github.com/llvm/llvm-project/pull/142737
The XAndesPerf extension includes unsigned bitfield extraction
instruction `NDS.BFOZ`, which can extract the bits from 0 to Len -1,
place them starting at bit Msb, and zero-fills the remaining bits.
This patch handles the cases where Msb < Lsb.
Instruction Sytax:
nds.bfoz Rd, Rs1, Msb, Lsb
The operation is:
if Msb < Lsb:
Lenm1 = Lsb - Msb;
Rd[Lsb:Msb] = Rs1[Lenm1:0];
if (Lsb < (XLen -1)) Rd[XLen-1:Lsb+1]=0;
Rd[Msb-1:0]=0;
When Len == 1, it is a special case where the Msb is set to 0 instead of
being equal to the Lsb.
>From 4d5c0f4bc6a8c0d86f7d2aa8a9f7e1df98112b52 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Tue, 3 Jun 2025 14:58:36 +0800
Subject: [PATCH 1/2] [RISCV] Pre-commit
---
llvm/test/CodeGen/RISCV/rv32xandesperf.ll | 56 ++++++++++++
llvm/test/CodeGen/RISCV/rv64xandesperf.ll | 100 ++++++++++++++++++++++
2 files changed, 156 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
index 71473ab5dfb58..c1e31c74f06a7 100644
--- a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
@@ -2,6 +2,10 @@
; RUN: llc -O0 -mtriple=riscv32 -mattr=+xandesperf -verify-machineinstrs < %s \
; RUN: | FileCheck %s
+; NDS.BFOZ
+
+; MSB >= LSB
+
define i32 @bfoz_from_and_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_and_i32:
; CHECK: # %bb.0:
@@ -70,6 +74,58 @@ define i64 @bfoz_from_lshr_and_i64(i64 %x) {
ret i64 %shifted
}
+; MSB = 0
+
+define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 31
+; CHECK-NEXT: srli a0, a0, 16
+; CHECK-NEXT: ret
+ %shifted = shl i32 %x, 15
+ %masked = and i32 %shifted, 32768
+ ret i32 %masked
+}
+
+define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 31
+; CHECK-NEXT: srli a0, a0, 13
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 31
+ %lshr = lshr i32 %shl, 13
+ ret i32 %lshr
+}
+
+; MSB < LSB
+
+define i32 @bfoz_from_and_shl_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 20
+; CHECK-NEXT: srli a0, a0, 8
+; CHECK-NEXT: ret
+ %shifted = shl i32 %x, 12
+ %masked = and i32 %shifted, 16773120
+ ret i32 %masked
+}
+
+define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 26
+; CHECK-NEXT: srli a0, a0, 7
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 26
+ %lshr = lshr i32 %shl, 7
+ ret i32 %lshr
+}
+
+; NDS.BFOS
+
+; MSB >= LSB
+
define i32 @bfos_from_ashr_shl_i32(i32 %x) {
; CHECK-LABEL: bfos_from_ashr_shl_i32:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
index 260d30be686dc..600b2edcfbbeb 100644
--- a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
@@ -2,6 +2,10 @@
; RUN: llc -mtriple=riscv64 -mattr=+xandesperf -verify-machineinstrs < %s \
; RUN: | FileCheck %s
+; NDS.BFOZ
+
+; MSB >= LSB
+
define i32 @bfoz_from_and_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_and_i32:
; CHECK: # %bb.0:
@@ -60,6 +64,102 @@ define i64 @bfoz_from_lshr_and_i64(i64 %x) {
ret i64 %shifted
}
+; MSB = 0
+
+define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 63
+; CHECK-NEXT: srli a0, a0, 48
+; CHECK-NEXT: ret
+ %shifted = shl i32 %x, 15
+ %masked = and i32 %shifted, 32768
+ ret i32 %masked
+}
+
+define i64 @bfoz_from_and_shl_with_msb_zero_i64(i64 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 63
+; CHECK-NEXT: srli a0, a0, 15
+; CHECK-NEXT: ret
+ %shifted = shl i64 %x, 48
+ %masked = and i64 %shifted, 281474976710656
+ ret i64 %masked
+}
+
+define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 63
+; CHECK-NEXT: srli a0, a0, 45
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 31
+ %lshr = lshr i32 %shl, 13
+ ret i32 %lshr
+}
+
+define i64 @bfoz_from_lshr_shl_with_msb_zero_i64(i64 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 63
+; CHECK-NEXT: srli a0, a0, 19
+; CHECK-NEXT: ret
+ %shl = shl i64 %x, 63
+ %lshr = lshr i64 %shl, 19
+ ret i64 %lshr
+}
+
+; MSB < LSB
+
+define i32 @bfoz_from_and_shl_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 52
+; CHECK-NEXT: srli a0, a0, 40
+; CHECK-NEXT: ret
+ %shifted = shl i32 %x, 12
+ %masked = and i32 %shifted, 16773120
+ ret i32 %masked
+}
+
+define i64 @bfoz_from_and_shl_i64(i64 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 52
+; CHECK-NEXT: srli a0, a0, 28
+; CHECK-NEXT: ret
+ %shifted = shl i64 %x, 24
+ %masked = and i64 %shifted, 68702699520
+ ret i64 %masked
+}
+
+define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 58
+; CHECK-NEXT: srli a0, a0, 39
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 26
+ %lshr = lshr i32 %shl, 7
+ ret i32 %lshr
+}
+
+define i64 @bfoz_from_lshr_shl_i64(i64 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a0, a0, 40
+; CHECK-NEXT: srli a0, a0, 15
+; CHECK-NEXT: ret
+ %shl = shl i64 %x, 40
+ %lshr = lshr i64 %shl, 15
+ ret i64 %lshr
+}
+
+; NDS.BFOS
+
+; MSB >= LSB
+
define i32 @bfos_from_ashr_shl_i32(i32 %x) {
; CHECK-LABEL: bfos_from_ashr_shl_i32:
; CHECK: # %bb.0:
>From 27d670fc5abb4457314368b0e82acff7832a68d4 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Tue, 3 Jun 2025 17:13:52 +0800
Subject: [PATCH 2/2] [RISCV] Select unsigned bitfield insert for XAndesPerf
The XAndesPerf extension includes unsigned bitfield extraction
instruction `NDS.BFOZ`, which can extract the bits from 0 to Len -1,
place them starting at bit Msb, and zero-fills the remaining bits.
This patch handles the cases where Msb < Lsb.
Instruction Sytax:
nds.bfoz Rd, Rs1, Msb, Lsb
The operation is:
if Msb < Lsb:
Lenm1 = Lsb - Msb;
Rd[Lsb:Msb] = Rs1[Lenm1:0];
if (Lsb < (XLen -1)) Rd[XLen-1:Lsb+1]=0;
Rd[Msb-1:0]=0;
When Len == 1, it is a special case where the Msb is set to 0 instead of
being equal to the Lsb.
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 34 +++++++++++++++++++++
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 2 ++
llvm/test/CodeGen/RISCV/rv32xandesperf.ll | 12 +++-----
llvm/test/CodeGen/RISCV/rv64xandesperf.ll | 24 +++++----------
llvm/test/CodeGen/RISCV/rv64zba.ll | 3 +-
5 files changed, 49 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 4f6aa41d1e03b..4c4b475ed3898 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -688,6 +688,23 @@ bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node, SDLoc DL,
return true;
}
+bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node, SDLoc DL,
+ MVT VT, SDValue X,
+ unsigned Msb,
+ unsigned Lsb) {
+ // Only supported with XAndesPerf at the moment.
+ if (!Subtarget->hasVendorXAndesPerf())
+ return false;
+
+ unsigned Opc = RISCV::NDS_BFOZ;
+
+ SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
+ CurDAG->getTargetConstant(Msb, DL, VT),
+ CurDAG->getTargetConstant(Lsb, DL, VT));
+ ReplaceNode(Node, Ubi);
+ return true;
+}
+
bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
// Target does not support indexed loads.
if (!Subtarget->hasVendorXTHeadMemIdx())
@@ -1324,6 +1341,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
+ // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
+ // available.
+ // Transform (and (shl x, c2), c1)
+ // -> (<bfinsert> x, msb, lsb)
+ // e.g.
+ // (and (shl x, 12), 0x00fff000)
+ // If XLen = 32 and C2 = 12, then
+ // Len = 32 - 8 - 12 = 12,
+ // Lsb = 32 - 8 - 1 = 23 and Msb = 12
+ // -> nds.bfoz x, 12, 23
+ const unsigned Len = XLen - Leading - C2;
+ const unsigned Lsb = XLen - Leading - 1;
+ // If Len is 1, the Msb will be 0 instead of C2.
+ unsigned Msb = Len == 1 ? 0 : C2;
+ if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
+ return;
+
// (srli (slli c2+c3), c3)
if (OneUseOrZExtW && !IsCANDI) {
SDNode *SLLI = CurDAG->getMachineNode(
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 11d62e5edad3f..f199c2031b9a9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -79,6 +79,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
bool trySignedBitfieldExtract(SDNode *Node);
bool tryUnsignedBitfieldExtract(SDNode *Node, SDLoc DL, MVT VT, SDValue X,
unsigned Msb, unsigned Lsb);
+ bool tryUnsignedBitfieldInsertInZero(SDNode *Node, SDLoc DL, MVT VT,
+ SDValue X, unsigned Msb, unsigned Lsb);
bool tryIndexedLoad(SDNode *Node);
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
diff --git a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
index c1e31c74f06a7..3996420d477b2 100644
--- a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
@@ -79,8 +79,7 @@ define i64 @bfoz_from_lshr_and_i64(i64 %x) {
define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 31
-; CHECK-NEXT: srli a0, a0, 16
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 15
; CHECK-NEXT: ret
%shifted = shl i32 %x, 15
%masked = and i32 %shifted, 32768
@@ -90,8 +89,7 @@ define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 31
-; CHECK-NEXT: srli a0, a0, 13
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 18
; CHECK-NEXT: ret
%shl = shl i32 %x, 31
%lshr = lshr i32 %shl, 13
@@ -103,8 +101,7 @@ define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
define i32 @bfoz_from_and_shl_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_and_shl_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 20
-; CHECK-NEXT: srli a0, a0, 8
+; CHECK-NEXT: nds.bfoz a0, a0, 12, 23
; CHECK-NEXT: ret
%shifted = shl i32 %x, 12
%masked = and i32 %shifted, 16773120
@@ -114,8 +111,7 @@ define i32 @bfoz_from_and_shl_i32(i32 %x) {
define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_lshr_shl_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 26
-; CHECK-NEXT: srli a0, a0, 7
+; CHECK-NEXT: nds.bfoz a0, a0, 19, 24
; CHECK-NEXT: ret
%shl = shl i32 %x, 26
%lshr = lshr i32 %shl, 7
diff --git a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
index 600b2edcfbbeb..af7c300a92d1f 100644
--- a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
@@ -69,8 +69,7 @@ define i64 @bfoz_from_lshr_and_i64(i64 %x) {
define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 63
-; CHECK-NEXT: srli a0, a0, 48
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 15
; CHECK-NEXT: ret
%shifted = shl i32 %x, 15
%masked = and i32 %shifted, 32768
@@ -80,8 +79,7 @@ define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
define i64 @bfoz_from_and_shl_with_msb_zero_i64(i64 %x) {
; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 63
-; CHECK-NEXT: srli a0, a0, 15
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 48
; CHECK-NEXT: ret
%shifted = shl i64 %x, 48
%masked = and i64 %shifted, 281474976710656
@@ -91,8 +89,7 @@ define i64 @bfoz_from_and_shl_with_msb_zero_i64(i64 %x) {
define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 63
-; CHECK-NEXT: srli a0, a0, 45
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 18
; CHECK-NEXT: ret
%shl = shl i32 %x, 31
%lshr = lshr i32 %shl, 13
@@ -102,8 +99,7 @@ define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
define i64 @bfoz_from_lshr_shl_with_msb_zero_i64(i64 %x) {
; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 63
-; CHECK-NEXT: srli a0, a0, 19
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 44
; CHECK-NEXT: ret
%shl = shl i64 %x, 63
%lshr = lshr i64 %shl, 19
@@ -115,8 +111,7 @@ define i64 @bfoz_from_lshr_shl_with_msb_zero_i64(i64 %x) {
define i32 @bfoz_from_and_shl_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_and_shl_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 52
-; CHECK-NEXT: srli a0, a0, 40
+; CHECK-NEXT: nds.bfoz a0, a0, 12, 23
; CHECK-NEXT: ret
%shifted = shl i32 %x, 12
%masked = and i32 %shifted, 16773120
@@ -126,8 +121,7 @@ define i32 @bfoz_from_and_shl_i32(i32 %x) {
define i64 @bfoz_from_and_shl_i64(i64 %x) {
; CHECK-LABEL: bfoz_from_and_shl_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 52
-; CHECK-NEXT: srli a0, a0, 28
+; CHECK-NEXT: nds.bfoz a0, a0, 24, 35
; CHECK-NEXT: ret
%shifted = shl i64 %x, 24
%masked = and i64 %shifted, 68702699520
@@ -137,8 +131,7 @@ define i64 @bfoz_from_and_shl_i64(i64 %x) {
define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_lshr_shl_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 58
-; CHECK-NEXT: srli a0, a0, 39
+; CHECK-NEXT: nds.bfoz a0, a0, 19, 24
; CHECK-NEXT: ret
%shl = shl i32 %x, 26
%lshr = lshr i32 %shl, 7
@@ -148,8 +141,7 @@ define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
define i64 @bfoz_from_lshr_shl_i64(i64 %x) {
; CHECK-LABEL: bfoz_from_lshr_shl_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a0, a0, 40
-; CHECK-NEXT: srli a0, a0, 15
+; CHECK-NEXT: nds.bfoz a0, a0, 25, 48
; CHECK-NEXT: ret
%shl = shl i64 %x, 40
%lshr = lshr i64 %shl, 15
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index a4d3b80edbd58..c93dc1f502f23 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -24,8 +24,7 @@ define i64 @slliuw(i64 %a) nounwind {
;
; RV64XANDESPERF-LABEL: slliuw:
; RV64XANDESPERF: # %bb.0:
-; RV64XANDESPERF-NEXT: slli a0, a0, 32
-; RV64XANDESPERF-NEXT: srli a0, a0, 31
+; RV64XANDESPERF-NEXT: nds.bfoz a0, a0, 1, 32
; RV64XANDESPERF-NEXT: ret
%conv1 = shl i64 %a, 1
%shl = and i64 %conv1, 8589934590
More information about the llvm-commits
mailing list