[llvm] [RISCV] Use shiftop<rotl> in one of the BCLR patterns. (PR #164206)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 19 23:11:52 PDT 2025
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/164206
This allows us to remove AND from the shift amount when DAG combine
has replaced (not (shl 1, X)) with (rotl -2, X). SimplifyDemandedBits
will often simplify the rotl case on its own, but not if the masked
shift amount has multiple users.
This will need to be rebased if #164050 goes in first.
>From 140b5712ab81fb070610136da736abb76a50d28a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sun, 19 Oct 2025 22:59:10 -0700
Subject: [PATCH 1/2] Pre-commit test
---
llvm/test/CodeGen/RISCV/rv32zbs.ll | 58 +++++++++++++++++++++++-------
llvm/test/CodeGen/RISCV/rv64zbs.ll | 46 ++++++++++++++++++++----
2 files changed, 86 insertions(+), 18 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll
index dcb70f88fd4ac..bb1dc186fae36 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbs.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll
@@ -45,6 +45,40 @@ define i32 @bclr_i32_no_mask(i32 %a, i32 %b) nounwind {
ret i32 %and1
}
+define i32 @bclr_i32_mask_multiple(i32 %a, i32 %b, i32 %shamt) nounwind {
+; RV32I-LABEL: bclr_i32_mask_multiple:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a3, 1
+; RV32I-NEXT: sll a2, a3, a2
+; RV32I-NEXT: not a3, a2
+; RV32I-NEXT: and a0, a3, a0
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBSNOZBB-LABEL: bclr_i32_mask_multiple:
+; RV32ZBSNOZBB: # %bb.0:
+; RV32ZBSNOZBB-NEXT: bclr a0, a0, a2
+; RV32ZBSNOZBB-NEXT: bset a1, a1, a2
+; RV32ZBSNOZBB-NEXT: add a0, a0, a1
+; RV32ZBSNOZBB-NEXT: ret
+;
+; RV32ZBSZBB-LABEL: bclr_i32_mask_multiple:
+; RV32ZBSZBB: # %bb.0:
+; RV32ZBSZBB-NEXT: andi a3, a2, 63
+; RV32ZBSZBB-NEXT: bclr a0, a0, a3
+; RV32ZBSZBB-NEXT: bset a1, a1, a2
+; RV32ZBSZBB-NEXT: add a0, a0, a1
+; RV32ZBSZBB-NEXT: ret
+ %shamt_masked = and i32 %shamt, 63
+ %shl = shl nuw i32 1, %shamt_masked
+ %neg = xor i32 %shl, -1
+ %and = and i32 %neg, %a
+ %or = or i32 %b, %shl
+ %c = add i32 %and, %or
+ ret i32 %c
+}
+
define i64 @bclr_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: bclr_i64:
; RV32I: # %bb.0:
@@ -301,17 +335,17 @@ define i64 @bext_i64(i64 %a, i64 %b) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: andi a3, a2, 63
; CHECK-NEXT: addi a4, a3, -32
-; CHECK-NEXT: bltz a4, .LBB12_2
+; CHECK-NEXT: bltz a4, .LBB13_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: srl a0, a1, a3
-; CHECK-NEXT: j .LBB12_3
-; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: j .LBB13_3
+; CHECK-NEXT: .LBB13_2:
; CHECK-NEXT: srl a0, a0, a2
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: not a2, a3
; CHECK-NEXT: sll a1, a1, a2
; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: .LBB12_3:
+; CHECK-NEXT: .LBB13_3:
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: ret
@@ -789,17 +823,17 @@ define i64 @bset_trailing_ones_i64_mask(i64 %a) nounwind {
; CHECK-NEXT: li a3, -1
; CHECK-NEXT: addi a1, a2, -32
; CHECK-NEXT: sll a0, a3, a0
-; CHECK-NEXT: bltz a1, .LBB43_2
+; CHECK-NEXT: bltz a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: sll a2, a3, a2
-; CHECK-NEXT: j .LBB43_3
-; CHECK-NEXT: .LBB43_2:
+; CHECK-NEXT: j .LBB44_3
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: not a2, a2
; CHECK-NEXT: lui a3, 524288
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: srl a2, a3, a2
; CHECK-NEXT: or a2, a0, a2
-; CHECK-NEXT: .LBB43_3:
+; CHECK-NEXT: .LBB44_3:
; CHECK-NEXT: srai a1, a1, 31
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: not a1, a2
@@ -817,17 +851,17 @@ define i64 @bset_trailing_ones_i64_no_mask(i64 %a) nounwind {
; CHECK-NEXT: li a1, -1
; CHECK-NEXT: addi a2, a0, -32
; CHECK-NEXT: sll a1, a1, a0
-; CHECK-NEXT: bltz a2, .LBB44_2
+; CHECK-NEXT: bltz a2, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: j .LBB44_3
-; CHECK-NEXT: .LBB44_2:
+; CHECK-NEXT: j .LBB45_3
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: not a0, a0
; CHECK-NEXT: lui a3, 524288
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: srl a0, a3, a0
; CHECK-NEXT: or a0, a1, a0
-; CHECK-NEXT: .LBB44_3:
+; CHECK-NEXT: .LBB45_3:
; CHECK-NEXT: srai a2, a2, 31
; CHECK-NEXT: and a2, a2, a1
; CHECK-NEXT: not a1, a0
diff --git a/llvm/test/CodeGen/RISCV/rv64zbs.ll b/llvm/test/CodeGen/RISCV/rv64zbs.ll
index b4edcf6cc55cf..d39018b93acdf 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbs.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbs.ll
@@ -2,9 +2,9 @@
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=CHECK,RV64I
; RUN: llc -mtriple=riscv64 -mattr=+zbs -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBS
+; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBS,RV64ZBSNOZBB
; RUN: llc -mtriple=riscv64 -mattr=+zbs,+zbb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBS
+; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBS,RV64ZBSZBB
define signext i32 @bclr_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: bclr_i32:
@@ -110,6 +110,40 @@ define i64 @bclr_i64_no_mask(i64 %a, i64 %b) nounwind {
ret i64 %and1
}
+define i64 @bclr_i64_mask_multiple(i64 %a, i64 %b, i64 %shamt) nounwind {
+; RV64I-LABEL: bclr_i64_mask_multiple:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a3, 1
+; RV64I-NEXT: sll a2, a3, a2
+; RV64I-NEXT: not a3, a2
+; RV64I-NEXT: and a0, a3, a0
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBSNOZBB-LABEL: bclr_i64_mask_multiple:
+; RV64ZBSNOZBB: # %bb.0:
+; RV64ZBSNOZBB-NEXT: bclr a0, a0, a2
+; RV64ZBSNOZBB-NEXT: bset a1, a1, a2
+; RV64ZBSNOZBB-NEXT: add a0, a0, a1
+; RV64ZBSNOZBB-NEXT: ret
+;
+; RV64ZBSZBB-LABEL: bclr_i64_mask_multiple:
+; RV64ZBSZBB: # %bb.0:
+; RV64ZBSZBB-NEXT: andi a3, a2, 63
+; RV64ZBSZBB-NEXT: bclr a0, a0, a3
+; RV64ZBSZBB-NEXT: bset a1, a1, a2
+; RV64ZBSZBB-NEXT: add a0, a0, a1
+; RV64ZBSZBB-NEXT: ret
+ %shamt_masked = and i64 %shamt, 63
+ %shl = shl nuw i64 1, %shamt_masked
+ %neg = xor i64 %shl, -1
+ %and = and i64 %neg, %a
+ %or = or i64 %b, %shl
+ %c = add i64 %and, %or
+ ret i64 %c
+}
+
define signext i32 @bset_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: bset_i32:
; RV64I: # %bb.0:
@@ -372,19 +406,19 @@ define void @bext_i32_trunc(i32 signext %0, i32 signext %1) {
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: andi a0, a0, 1
-; RV64I-NEXT: beqz a0, .LBB19_2
+; RV64I-NEXT: beqz a0, .LBB20_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: ret
-; RV64I-NEXT: .LBB19_2:
+; RV64I-NEXT: .LBB20_2:
; RV64I-NEXT: tail bar
;
; RV64ZBS-LABEL: bext_i32_trunc:
; RV64ZBS: # %bb.0:
; RV64ZBS-NEXT: bext a0, a0, a1
-; RV64ZBS-NEXT: beqz a0, .LBB19_2
+; RV64ZBS-NEXT: beqz a0, .LBB20_2
; RV64ZBS-NEXT: # %bb.1:
; RV64ZBS-NEXT: ret
-; RV64ZBS-NEXT: .LBB19_2:
+; RV64ZBS-NEXT: .LBB20_2:
; RV64ZBS-NEXT: tail bar
%3 = shl i32 1, %1
%4 = and i32 %3, %0
>From d3396a5412a1530cf8894daa2acb2f14fc29fc0f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sun, 19 Oct 2025 23:01:58 -0700
Subject: [PATCH 2/2] [RISCV] Use shiftop<rotl> in one of the BCLR patterns.
This allows us to remove AND from the shift amount when DAG combine
has replaced (not (shl 1, X)) with (rotl -2, X). SimplifyDemandedBits
will often simplify the rotl case on its own, but not if the masked
shift amount has multiple users.
This will need to be rebase if #164050 goes in first.
---
llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 2 +-
llvm/test/CodeGen/RISCV/rv32zbs.ll | 20 ++++++-------------
llvm/test/CodeGen/RISCV/rv64zbs.ll | 24 ++++++++---------------
3 files changed, 15 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 57fbaa04ec687..f31ba17d6112d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -527,7 +527,7 @@ def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
let Predicates = [HasStdExtZbs] in {
def : Pat<(XLenVT (and (not (shiftop<shl> 1, (XLenVT GPR:$rs2))), GPR:$rs1)),
(BCLR GPR:$rs1, GPR:$rs2)>;
-def : Pat<(XLenVT (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
+def : Pat<(XLenVT (and (shiftop<rotl> -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
(BCLR GPR:$rs1, GPR:$rs2)>;
def : Pat<(XLenVT (or (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
(BSET GPR:$rs1, GPR:$rs2)>;
diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll
index bb1dc186fae36..f9527ef79272b 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbs.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll
@@ -56,20 +56,12 @@ define i32 @bclr_i32_mask_multiple(i32 %a, i32 %b, i32 %shamt) nounwind {
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: ret
;
-; RV32ZBSNOZBB-LABEL: bclr_i32_mask_multiple:
-; RV32ZBSNOZBB: # %bb.0:
-; RV32ZBSNOZBB-NEXT: bclr a0, a0, a2
-; RV32ZBSNOZBB-NEXT: bset a1, a1, a2
-; RV32ZBSNOZBB-NEXT: add a0, a0, a1
-; RV32ZBSNOZBB-NEXT: ret
-;
-; RV32ZBSZBB-LABEL: bclr_i32_mask_multiple:
-; RV32ZBSZBB: # %bb.0:
-; RV32ZBSZBB-NEXT: andi a3, a2, 63
-; RV32ZBSZBB-NEXT: bclr a0, a0, a3
-; RV32ZBSZBB-NEXT: bset a1, a1, a2
-; RV32ZBSZBB-NEXT: add a0, a0, a1
-; RV32ZBSZBB-NEXT: ret
+; RV32ZBS-LABEL: bclr_i32_mask_multiple:
+; RV32ZBS: # %bb.0:
+; RV32ZBS-NEXT: bclr a0, a0, a2
+; RV32ZBS-NEXT: bset a1, a1, a2
+; RV32ZBS-NEXT: add a0, a0, a1
+; RV32ZBS-NEXT: ret
%shamt_masked = and i32 %shamt, 63
%shl = shl nuw i32 1, %shamt_masked
%neg = xor i32 %shl, -1
diff --git a/llvm/test/CodeGen/RISCV/rv64zbs.ll b/llvm/test/CodeGen/RISCV/rv64zbs.ll
index d39018b93acdf..d42bc8e128082 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbs.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbs.ll
@@ -2,9 +2,9 @@
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefixes=CHECK,RV64I
; RUN: llc -mtriple=riscv64 -mattr=+zbs -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBS,RV64ZBSNOZBB
+; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBS
; RUN: llc -mtriple=riscv64 -mattr=+zbs,+zbb -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBS,RV64ZBSZBB
+; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBS
define signext i32 @bclr_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: bclr_i32:
@@ -121,20 +121,12 @@ define i64 @bclr_i64_mask_multiple(i64 %a, i64 %b, i64 %shamt) nounwind {
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
-; RV64ZBSNOZBB-LABEL: bclr_i64_mask_multiple:
-; RV64ZBSNOZBB: # %bb.0:
-; RV64ZBSNOZBB-NEXT: bclr a0, a0, a2
-; RV64ZBSNOZBB-NEXT: bset a1, a1, a2
-; RV64ZBSNOZBB-NEXT: add a0, a0, a1
-; RV64ZBSNOZBB-NEXT: ret
-;
-; RV64ZBSZBB-LABEL: bclr_i64_mask_multiple:
-; RV64ZBSZBB: # %bb.0:
-; RV64ZBSZBB-NEXT: andi a3, a2, 63
-; RV64ZBSZBB-NEXT: bclr a0, a0, a3
-; RV64ZBSZBB-NEXT: bset a1, a1, a2
-; RV64ZBSZBB-NEXT: add a0, a0, a1
-; RV64ZBSZBB-NEXT: ret
+; RV64ZBS-LABEL: bclr_i64_mask_multiple:
+; RV64ZBS: # %bb.0:
+; RV64ZBS-NEXT: bclr a0, a0, a2
+; RV64ZBS-NEXT: bset a1, a1, a2
+; RV64ZBS-NEXT: add a0, a0, a1
+; RV64ZBS-NEXT: ret
%shamt_masked = and i64 %shamt, 63
%shl = shl nuw i64 1, %shamt_masked
%neg = xor i64 %shl, -1
More information about the llvm-commits
mailing list