[llvm] [RISCV] Implement shouldFoldMaskToVariableShiftPair (PR #166159)

Mon Nov 3 04:59:53 PST 2025

https://github.com/svs-quic created https://github.com/llvm/llvm-project/pull/166159

Folding a mask to a variable shift pair results in better code size as long as they are scalars that are <= XLen.

Similar to https://github.com/llvm/llvm-project/pull/158069

>From 59a6c04d6c1084e20ab0197fcd8aca3428769bb8 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Mon, 3 Nov 2025 17:59:16 +0530
Subject: [PATCH 1/2] Pre-commit test

---
 .../test/CodeGen/RISCV/mask-variable-shift.ll | 135 ++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/mask-variable-shift.ll

diff --git a/llvm/test/CodeGen/RISCV/mask-variable-shift.ll b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
new file mode 100644
index 0000000000000..ec36a14498d59
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=RV32
+; RUN: llc -mtriple=riscv64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=RV64
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; RV32-LABEL: mask_pair:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, -1
+; RV32-NEXT:    sll a1, a2, a1
+; RV32-NEXT:    and a0, a1, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: mask_pair:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, -1
+; RV64-NEXT:    sllw a1, a2, a1
+; RV64-NEXT:    and a0, a1, a0
+; RV64-NEXT:    ret
+  %shl = shl nsw i32 -1, %y
+  %and = and i32 %shl, %x
+  ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; RV32-LABEL: mask_pair_64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, -1
+; RV32-NEXT:    addi a4, a2, -32
+; RV32-NEXT:    sll a3, a3, a2
+; RV32-NEXT:    bltz a4, .LBB1_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:    j .LBB1_3
+; RV32-NEXT:  .LBB1_2:
+; RV32-NEXT:    not a2, a2
+; RV32-NEXT:    lui a5, 524288
+; RV32-NEXT:    addi a5, a5, -1
+; RV32-NEXT:    srl a2, a5, a2
+; RV32-NEXT:    or a2, a3, a2
+; RV32-NEXT:  .LBB1_3:
+; RV32-NEXT:    srai a4, a4, 31
+; RV32-NEXT:    and a3, a4, a3
+; RV32-NEXT:    and a1, a2, a1
+; RV32-NEXT:    and a0, a3, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: mask_pair_64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, -1
+; RV64-NEXT:    sll a1, a2, a1
+; RV64-NEXT:    and a0, a1, a0
+; RV64-NEXT:    ret
+  %shl = shl nsw i64 -1, %y
+  %and = and i64 %shl, %x
+  ret i64 %and
+}
+
+define i128 @mask_pair_128(i128 %x, i128 %y) {
+; RV32-LABEL: mask_pair_128:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
+; RV32-NEXT:    lw a5, 0(a1)
+; RV32-NEXT:    lw a4, 4(a1)
+; RV32-NEXT:    lw a3, 8(a1)
+; RV32-NEXT:    lw a1, 12(a1)
+; RV32-NEXT:    lw a2, 0(a2)
+; RV32-NEXT:    li a6, -1
+; RV32-NEXT:    sw zero, 0(sp)
+; RV32-NEXT:    sw zero, 4(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw zero, 12(sp)
+; RV32-NEXT:    addi a7, sp, 16
+; RV32-NEXT:    sw a6, 16(sp)
+; RV32-NEXT:    sw a6, 20(sp)
+; RV32-NEXT:    sw a6, 24(sp)
+; RV32-NEXT:    sw a6, 28(sp)
+; RV32-NEXT:    srli a6, a2, 3
+; RV32-NEXT:    andi a6, a6, 12
+; RV32-NEXT:    sub a6, a7, a6
+; RV32-NEXT:    lw a7, 4(a6)
+; RV32-NEXT:    lw t0, 8(a6)
+; RV32-NEXT:    lw t1, 12(a6)
+; RV32-NEXT:    lw a6, 0(a6)
+; RV32-NEXT:    andi t2, a2, 31
+; RV32-NEXT:    xori t2, t2, 31
+; RV32-NEXT:    sll t1, t1, a2
+; RV32-NEXT:    srli t3, t0, 1
+; RV32-NEXT:    sll t0, t0, a2
+; RV32-NEXT:    srli t4, a7, 1
+; RV32-NEXT:    sll a7, a7, a2
+; RV32-NEXT:    sll a2, a6, a2
+; RV32-NEXT:    srli a6, a6, 1
+; RV32-NEXT:    srl t3, t3, t2
+; RV32-NEXT:    srl t4, t4, t2
+; RV32-NEXT:    srl a6, a6, t2
+; RV32-NEXT:    and a2, a2, a5
+; RV32-NEXT:    or a5, t1, t3
+; RV32-NEXT:    or t0, t0, t4
+; RV32-NEXT:    or a6, a7, a6
+; RV32-NEXT:    and a4, a6, a4
+; RV32-NEXT:    and a3, t0, a3
+; RV32-NEXT:    and a1, a5, a1
+; RV32-NEXT:    sw a2, 0(a0)
+; RV32-NEXT:    sw a4, 4(a0)
+; RV32-NEXT:    sw a3, 8(a0)
+; RV32-NEXT:    sw a1, 12(a0)
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: mask_pair_128:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a5, -1
+; RV64-NEXT:    addi a4, a2, -64
+; RV64-NEXT:    sll a3, a5, a2
+; RV64-NEXT:    bltz a4, .LBB2_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:    j .LBB2_3
+; RV64-NEXT:  .LBB2_2:
+; RV64-NEXT:    not a2, a2
+; RV64-NEXT:    srli a5, a5, 1
+; RV64-NEXT:    srl a2, a5, a2
+; RV64-NEXT:    or a2, a3, a2
+; RV64-NEXT:  .LBB2_3:
+; RV64-NEXT:    srai a4, a4, 63
+; RV64-NEXT:    and a3, a4, a3
+; RV64-NEXT:    and a1, a2, a1
+; RV64-NEXT:    and a0, a3, a0
+; RV64-NEXT:    ret
+  %shl = shl nsw i128 -1, %y
+  %and = and i128 %shl, %x
+  ret i128 %and
+}

>From f3e1357532c98e246c15b5d859f19f10af1c6bf1 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Mon, 3 Nov 2025 18:23:01 +0530
Subject: [PATCH 2/2] [RISCV] Implement shouldFoldMaskToVariableShiftPair

Folding a mask to a variable shift pair results in better code size
as long as they are scalars that are <= XLen.

Similar to #158069
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp    |  9 +++++++++
 llvm/lib/Target/RISCV/RISCVISelLowering.h      |  2 ++
 llvm/test/CodeGen/RISCV/mask-variable-shift.ll | 15 ++++++---------
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e0cf739f67d9b..5405bcff4056b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25320,3 +25320,12 @@ ArrayRef<MCPhysReg> RISCVTargetLowering::getRoundingControlRegisters() const {
   }
   return {};
 }
+
+bool RISCVTargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
+  EVT VT = Y.getValueType();
+
+  if (VT.isVector())
+    return false;
+
+  return VT.getScalarSizeInBits() <= Subtarget.getXLen();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 9e3e2a9443625..dd62a9cf6c9e2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -465,6 +465,8 @@ class RISCVTargetLowering : public TargetLowering {
 
   ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
 
+  bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
+
   /// Match a mask which "spreads" the leading elements of a vector evenly
   /// across the result.  Factor is the spread amount, and Index is the
   /// offset applied.
diff --git a/llvm/test/CodeGen/RISCV/mask-variable-shift.ll b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
index ec36a14498d59..4e73cee30ef08 100644
--- a/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
+++ b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
@@ -5,16 +5,14 @@
 define i32 @mask_pair(i32 %x, i32 %y) {
 ; RV32-LABEL: mask_pair:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a2, -1
-; RV32-NEXT:    sll a1, a2, a1
-; RV32-NEXT:    and a0, a1, a0
+; RV32-NEXT:    srl a0, a0, a1
+; RV32-NEXT:    sll a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mask_pair:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a2, -1
-; RV64-NEXT:    sllw a1, a2, a1
-; RV64-NEXT:    and a0, a1, a0
+; RV64-NEXT:    srlw a0, a0, a1
+; RV64-NEXT:    sllw a0, a0, a1
 ; RV64-NEXT:    ret
   %shl = shl nsw i32 -1, %y
   %and = and i32 %shl, %x
@@ -46,9 +44,8 @@ define i64 @mask_pair_64(i64 %x, i64 %y) {
 ;
 ; RV64-LABEL: mask_pair_64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a2, -1
-; RV64-NEXT:    sll a1, a2, a1
-; RV64-NEXT:    and a0, a1, a0
+; RV64-NEXT:    srl a0, a0, a1
+; RV64-NEXT:    sll a0, a0, a1
 ; RV64-NEXT:    ret
   %shl = shl nsw i64 -1, %y
   %and = and i64 %shl, %x