[llvm] [RISCV] Implement shouldFoldMaskToVariableShiftPair (PR #166159)
Sudharsan Veeravalli via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 3 04:59:53 PST 2025
https://github.com/svs-quic created https://github.com/llvm/llvm-project/pull/166159
Folding a mask to a variable shift pair results in better code size as long as they are scalars that are <= XLen.
Similar to https://github.com/llvm/llvm-project/pull/158069
>From 59a6c04d6c1084e20ab0197fcd8aca3428769bb8 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Mon, 3 Nov 2025 17:59:16 +0530
Subject: [PATCH 1/2] Pre-commit test
---
.../test/CodeGen/RISCV/mask-variable-shift.ll | 135 ++++++++++++++++++
1 file changed, 135 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/mask-variable-shift.ll
diff --git a/llvm/test/CodeGen/RISCV/mask-variable-shift.ll b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
new file mode 100644
index 0000000000000..ec36a14498d59
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=RV32
+; RUN: llc -mtriple=riscv64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=RV64
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; RV32-LABEL: mask_pair:
+; RV32: # %bb.0:
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: sll a1, a2, a1
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mask_pair:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: sllw a1, a2, a1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: ret
+ %shl = shl nsw i32 -1, %y
+ %and = and i32 %shl, %x
+ ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; RV32-LABEL: mask_pair_64:
+; RV32: # %bb.0:
+; RV32-NEXT: li a3, -1
+; RV32-NEXT: addi a4, a2, -32
+; RV32-NEXT: sll a3, a3, a2
+; RV32-NEXT: bltz a4, .LBB1_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a2, a3
+; RV32-NEXT: j .LBB1_3
+; RV32-NEXT: .LBB1_2:
+; RV32-NEXT: not a2, a2
+; RV32-NEXT: lui a5, 524288
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: srl a2, a5, a2
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: .LBB1_3:
+; RV32-NEXT: srai a4, a4, 31
+; RV32-NEXT: and a3, a4, a3
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: and a0, a3, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mask_pair_64:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: sll a1, a2, a1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: ret
+ %shl = shl nsw i64 -1, %y
+ %and = and i64 %shl, %x
+ ret i64 %and
+}
+
+define i128 @mask_pair_128(i128 %x, i128 %y) {
+; RV32-LABEL: mask_pair_128:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: lw a5, 0(a1)
+; RV32-NEXT: lw a4, 4(a1)
+; RV32-NEXT: lw a3, 8(a1)
+; RV32-NEXT: lw a1, 12(a1)
+; RV32-NEXT: lw a2, 0(a2)
+; RV32-NEXT: li a6, -1
+; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: sw zero, 4(sp)
+; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: addi a7, sp, 16
+; RV32-NEXT: sw a6, 16(sp)
+; RV32-NEXT: sw a6, 20(sp)
+; RV32-NEXT: sw a6, 24(sp)
+; RV32-NEXT: sw a6, 28(sp)
+; RV32-NEXT: srli a6, a2, 3
+; RV32-NEXT: andi a6, a6, 12
+; RV32-NEXT: sub a6, a7, a6
+; RV32-NEXT: lw a7, 4(a6)
+; RV32-NEXT: lw t0, 8(a6)
+; RV32-NEXT: lw t1, 12(a6)
+; RV32-NEXT: lw a6, 0(a6)
+; RV32-NEXT: andi t2, a2, 31
+; RV32-NEXT: xori t2, t2, 31
+; RV32-NEXT: sll t1, t1, a2
+; RV32-NEXT: srli t3, t0, 1
+; RV32-NEXT: sll t0, t0, a2
+; RV32-NEXT: srli t4, a7, 1
+; RV32-NEXT: sll a7, a7, a2
+; RV32-NEXT: sll a2, a6, a2
+; RV32-NEXT: srli a6, a6, 1
+; RV32-NEXT: srl t3, t3, t2
+; RV32-NEXT: srl t4, t4, t2
+; RV32-NEXT: srl a6, a6, t2
+; RV32-NEXT: and a2, a2, a5
+; RV32-NEXT: or a5, t1, t3
+; RV32-NEXT: or t0, t0, t4
+; RV32-NEXT: or a6, a7, a6
+; RV32-NEXT: and a4, a6, a4
+; RV32-NEXT: and a3, t0, a3
+; RV32-NEXT: and a1, a5, a1
+; RV32-NEXT: sw a2, 0(a0)
+; RV32-NEXT: sw a4, 4(a0)
+; RV32-NEXT: sw a3, 8(a0)
+; RV32-NEXT: sw a1, 12(a0)
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mask_pair_128:
+; RV64: # %bb.0:
+; RV64-NEXT: li a5, -1
+; RV64-NEXT: addi a4, a2, -64
+; RV64-NEXT: sll a3, a5, a2
+; RV64-NEXT: bltz a4, .LBB2_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a2, a3
+; RV64-NEXT: j .LBB2_3
+; RV64-NEXT: .LBB2_2:
+; RV64-NEXT: not a2, a2
+; RV64-NEXT: srli a5, a5, 1
+; RV64-NEXT: srl a2, a5, a2
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: .LBB2_3:
+; RV64-NEXT: srai a4, a4, 63
+; RV64-NEXT: and a3, a4, a3
+; RV64-NEXT: and a1, a2, a1
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: ret
+ %shl = shl nsw i128 -1, %y
+ %and = and i128 %shl, %x
+ ret i128 %and
+}
>From f3e1357532c98e246c15b5d859f19f10af1c6bf1 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Mon, 3 Nov 2025 18:23:01 +0530
Subject: [PATCH 2/2] [RISCV] Implement shouldFoldMaskToVariableShiftPair
Folding a mask to a variable shift pair results in better code size
as long as they are scalars that are <= XLen.
Similar to #158069
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 +++++++++
llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 ++
llvm/test/CodeGen/RISCV/mask-variable-shift.ll | 15 ++++++---------
3 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e0cf739f67d9b..5405bcff4056b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25320,3 +25320,12 @@ ArrayRef<MCPhysReg> RISCVTargetLowering::getRoundingControlRegisters() const {
}
return {};
}
+
+bool RISCVTargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
+ EVT VT = Y.getValueType();
+
+ if (VT.isVector())
+ return false;
+
+ return VT.getScalarSizeInBits() <= Subtarget.getXLen();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 9e3e2a9443625..dd62a9cf6c9e2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -465,6 +465,8 @@ class RISCVTargetLowering : public TargetLowering {
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
+ bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
+
/// Match a mask which "spreads" the leading elements of a vector evenly
/// across the result. Factor is the spread amount, and Index is the
/// offset applied.
diff --git a/llvm/test/CodeGen/RISCV/mask-variable-shift.ll b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
index ec36a14498d59..4e73cee30ef08 100644
--- a/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
+++ b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
@@ -5,16 +5,14 @@
define i32 @mask_pair(i32 %x, i32 %y) {
; RV32-LABEL: mask_pair:
; RV32: # %bb.0:
-; RV32-NEXT: li a2, -1
-; RV32-NEXT: sll a1, a2, a1
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: srl a0, a0, a1
+; RV32-NEXT: sll a0, a0, a1
; RV32-NEXT: ret
;
; RV64-LABEL: mask_pair:
; RV64: # %bb.0:
-; RV64-NEXT: li a2, -1
-; RV64-NEXT: sllw a1, a2, a1
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: srlw a0, a0, a1
+; RV64-NEXT: sllw a0, a0, a1
; RV64-NEXT: ret
%shl = shl nsw i32 -1, %y
%and = and i32 %shl, %x
@@ -46,9 +44,8 @@ define i64 @mask_pair_64(i64 %x, i64 %y) {
;
; RV64-LABEL: mask_pair_64:
; RV64: # %bb.0:
-; RV64-NEXT: li a2, -1
-; RV64-NEXT: sll a1, a2, a1
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: srl a0, a0, a1
+; RV64-NEXT: sll a0, a0, a1
; RV64-NEXT: ret
%shl = shl nsw i64 -1, %y
%and = and i64 %shl, %x
More information about the llvm-commits
mailing list