[llvm] [RISCV] Implement shouldFoldMaskToVariableShiftPair (PR #166159)

Mon Nov 3 05:00:29 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Sudharsan Veeravalli (svs-quic)

<details>
<summary>Changes</summary>

Folding a mask to a variable shift pair results in better code size as long as they are scalars that are <= XLen.

Similar to https://github.com/llvm/llvm-project/pull/158069

---
Full diff: https://github.com/llvm/llvm-project/pull/166159.diff


3 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+9) 
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+2) 
- (added) llvm/test/CodeGen/RISCV/mask-variable-shift.ll (+132) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e0cf739f67d9b..5405bcff4056b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25320,3 +25320,12 @@ ArrayRef<MCPhysReg> RISCVTargetLowering::getRoundingControlRegisters() const {
   }
   return {};
 }
+
+bool RISCVTargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
+  EVT VT = Y.getValueType();
+
+  if (VT.isVector())
+    return false;
+
+  return VT.getScalarSizeInBits() <= Subtarget.getXLen();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 9e3e2a9443625..dd62a9cf6c9e2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -465,6 +465,8 @@ class RISCVTargetLowering : public TargetLowering {
 
   ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
 
+  bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
+
   /// Match a mask which "spreads" the leading elements of a vector evenly
   /// across the result.  Factor is the spread amount, and Index is the
   /// offset applied.
diff --git a/llvm/test/CodeGen/RISCV/mask-variable-shift.ll b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
new file mode 100644
index 0000000000000..4e73cee30ef08
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/mask-variable-shift.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=RV32
+; RUN: llc -mtriple=riscv64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=RV64
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; RV32-LABEL: mask_pair:
+; RV32:       # %bb.0:
+; RV32-NEXT:    srl a0, a0, a1
+; RV32-NEXT:    sll a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: mask_pair:
+; RV64:       # %bb.0:
+; RV64-NEXT:    srlw a0, a0, a1
+; RV64-NEXT:    sllw a0, a0, a1
+; RV64-NEXT:    ret
+  %shl = shl nsw i32 -1, %y
+  %and = and i32 %shl, %x
+  ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; RV32-LABEL: mask_pair_64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, -1
+; RV32-NEXT:    addi a4, a2, -32
+; RV32-NEXT:    sll a3, a3, a2
+; RV32-NEXT:    bltz a4, .LBB1_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:    j .LBB1_3
+; RV32-NEXT:  .LBB1_2:
+; RV32-NEXT:    not a2, a2
+; RV32-NEXT:    lui a5, 524288
+; RV32-NEXT:    addi a5, a5, -1
+; RV32-NEXT:    srl a2, a5, a2
+; RV32-NEXT:    or a2, a3, a2
+; RV32-NEXT:  .LBB1_3:
+; RV32-NEXT:    srai a4, a4, 31
+; RV32-NEXT:    and a3, a4, a3
+; RV32-NEXT:    and a1, a2, a1
+; RV32-NEXT:    and a0, a3, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: mask_pair_64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    srl a0, a0, a1
+; RV64-NEXT:    sll a0, a0, a1
+; RV64-NEXT:    ret
+  %shl = shl nsw i64 -1, %y
+  %and = and i64 %shl, %x
+  ret i64 %and
+}
+
+define i128 @mask_pair_128(i128 %x, i128 %y) {
+; RV32-LABEL: mask_pair_128:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
+; RV32-NEXT:    lw a5, 0(a1)
+; RV32-NEXT:    lw a4, 4(a1)
+; RV32-NEXT:    lw a3, 8(a1)
+; RV32-NEXT:    lw a1, 12(a1)
+; RV32-NEXT:    lw a2, 0(a2)
+; RV32-NEXT:    li a6, -1
+; RV32-NEXT:    sw zero, 0(sp)
+; RV32-NEXT:    sw zero, 4(sp)
+; RV32-NEXT:    sw zero, 8(sp)
+; RV32-NEXT:    sw zero, 12(sp)
+; RV32-NEXT:    addi a7, sp, 16
+; RV32-NEXT:    sw a6, 16(sp)
+; RV32-NEXT:    sw a6, 20(sp)
+; RV32-NEXT:    sw a6, 24(sp)
+; RV32-NEXT:    sw a6, 28(sp)
+; RV32-NEXT:    srli a6, a2, 3
+; RV32-NEXT:    andi a6, a6, 12
+; RV32-NEXT:    sub a6, a7, a6
+; RV32-NEXT:    lw a7, 4(a6)
+; RV32-NEXT:    lw t0, 8(a6)
+; RV32-NEXT:    lw t1, 12(a6)
+; RV32-NEXT:    lw a6, 0(a6)
+; RV32-NEXT:    andi t2, a2, 31
+; RV32-NEXT:    xori t2, t2, 31
+; RV32-NEXT:    sll t1, t1, a2
+; RV32-NEXT:    srli t3, t0, 1
+; RV32-NEXT:    sll t0, t0, a2
+; RV32-NEXT:    srli t4, a7, 1
+; RV32-NEXT:    sll a7, a7, a2
+; RV32-NEXT:    sll a2, a6, a2
+; RV32-NEXT:    srli a6, a6, 1
+; RV32-NEXT:    srl t3, t3, t2
+; RV32-NEXT:    srl t4, t4, t2
+; RV32-NEXT:    srl a6, a6, t2
+; RV32-NEXT:    and a2, a2, a5
+; RV32-NEXT:    or a5, t1, t3
+; RV32-NEXT:    or t0, t0, t4
+; RV32-NEXT:    or a6, a7, a6
+; RV32-NEXT:    and a4, a6, a4
+; RV32-NEXT:    and a3, t0, a3
+; RV32-NEXT:    and a1, a5, a1
+; RV32-NEXT:    sw a2, 0(a0)
+; RV32-NEXT:    sw a4, 4(a0)
+; RV32-NEXT:    sw a3, 8(a0)
+; RV32-NEXT:    sw a1, 12(a0)
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: mask_pair_128:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a5, -1
+; RV64-NEXT:    addi a4, a2, -64
+; RV64-NEXT:    sll a3, a5, a2
+; RV64-NEXT:    bltz a4, .LBB2_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:    j .LBB2_3
+; RV64-NEXT:  .LBB2_2:
+; RV64-NEXT:    not a2, a2
+; RV64-NEXT:    srli a5, a5, 1
+; RV64-NEXT:    srl a2, a5, a2
+; RV64-NEXT:    or a2, a3, a2
+; RV64-NEXT:  .LBB2_3:
+; RV64-NEXT:    srai a4, a4, 63
+; RV64-NEXT:    and a3, a4, a3
+; RV64-NEXT:    and a1, a2, a1
+; RV64-NEXT:    and a0, a3, a0
+; RV64-NEXT:    ret
+  %shl = shl nsw i128 -1, %y
+  %and = and i128 %shl, %x
+  ret i128 %and
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/166159