[llvm] [RISCV] Relax reversed mask's mask requirement in reverse to strided load/store combine (PR #180706)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 10 00:54:34 PST 2026
https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/180706
We have combines for vp.reverse(vp.load) -> vp.strided.load stride=-1 and vp.store(vp.reverse) -> vp.strided.store stride=-1.
If the load or store is masked, the mask needs to be also a vp.reverse with the same EVL. However we also have the requirement that the mask's vp.reverse is unmasked (has an all-ones mask).
vp.reverse's mask only sets masked off lanes to poison, and doesn't affect the permutation of elements. So given those lanes are poison, I believe the combine is valid for any mask, not just all ones.
This is split off from another patch I plan on posting to generalize those combines to vector.splice+vector.reverse patterns, as part of #172961
>From 5e4c5274e6a8305dee244e7d71fd3e32c3d7741b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 10 Feb 2026 16:28:05 +0800
Subject: [PATCH 1/2] Precommit tests
---
.../RISCV/rvv/vp-combine-reverse-load.ll | 26 ++++++++++++++++
.../RISCV/rvv/vp-combine-store-reverse.ll | 31 +++++++++++++------
2 files changed, 48 insertions(+), 9 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll
index 75c60ad9382b5..682aeffdcfdb0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll
@@ -33,6 +33,32 @@ define <vscale x 2 x float> @test_load_mask_is_vp_reverse(<vscale x 2 x float>*
ret <vscale x 2 x float> %rev
}
+define <vscale x 2 x float> @test_load_mask_is_vp_reverse_with_mask(<vscale x 2 x float>* %ptr, <vscale x 2 x i1> %mask, <vscale x 2 x i1> %revmask, i32 zeroext %evl) {
+; CHECK-LABEL: test_load_mask_is_vp_reverse_with_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: vrsub.vx v10, v10, a1, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t
+; CHECK-NEXT: vle32.v v9, (a0), v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v10, v8, a1
+; CHECK-NEXT: vrgather.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %loadmask = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %revmask, i32 %evl)
+ %load = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>* %ptr, <vscale x 2 x i1> %loadmask, i32 %evl)
+ %rev = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> %load, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x float> %rev
+}
+
define <vscale x 2 x float> @test_load_mask_not_all_one(<vscale x 2 x float>* %ptr, <vscale x 2 x i1> %notallones, i32 zeroext %evl) {
; CHECK-LABEL: test_load_mask_not_all_one:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll
index 5fa29dac69601..190ef968c50be 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll
@@ -33,18 +33,31 @@ define void @test_store_mask_is_vp_reverse(<vscale x 2 x float> %val, <vscale x
ret void
}
-define void @test_store_mask_not_all_one(<vscale x 2 x float> %val, <vscale x 2 x float>* %ptr, <vscale x 2 x i1> %notallones, i32 zeroext %evl) {
-; CHECK-LABEL: test_store_mask_not_all_one:
+define void @test_store_mask_is_vp_reverse_with_mask(<vscale x 2 x float> %val, <vscale x 2 x float>* %ptr, <vscale x 2 x i1> %mask, <vscale x 2 x i1> %revmask, i32 zeroext %evl) {
+; CHECK-LABEL: test_store_mask_is_vp_reverse_with_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vrsub.vx v9, v9, a1, v0.t
-; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t
-; CHECK-NEXT: vse32.v v10, (a0), v0.t
+; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vid.v v11, v0.t
+; CHECK-NEXT: vrsub.vx v11, v11, a1, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v12, v10, v11, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v10, v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vse32.v v9, (a0), v0.t
; CHECK-NEXT: ret
- %rev = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x i1> %notallones, i32 %evl)
- call void @llvm.vp.store.nxv2f32.p0nxv2f32(<vscale x 2 x float> %rev, <vscale x 2 x float>* %ptr, <vscale x 2 x i1> %notallones, i32 %evl)
+ %storemask = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %revmask, i32 %evl)
+ %rev = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ call void @llvm.vp.store.nxv2f32.p0nxv2f32(<vscale x 2 x float> %rev, <vscale x 2 x float>* %ptr, <vscale x 2 x i1> %storemask, i32 %evl)
ret void
}
>From 31581b45f5fcc11d1433e9137d7641dd0a14c1b8 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 10 Feb 2026 16:31:55 +0800
Subject: [PATCH 2/2] [RISCV] Relax reversed mask's mask requirement in reverse
to strided load/store combine
We have combines for vp.reverse(vp.load) -> vp.strided.load stride=-1 and vp.store(vp.reverse) -> vp.strided.store stride=-1.
If the load or store is masked, the mask needs to be also a vp.reverse with the same EVL. However we also have the requirement that the mask's vp.reverse is unmasked (has an all-ones mask).
vp.reverse's mask only sets masked off lanes to poison, and doesn't affect the permutation of elements. So given those lanes are poison, I believe the combine is valid for any mask, not just all ones.
This is split off from another patch I plan on posting to generalize those combines to vector.splice+vector.reverse patterns, as part of #172961
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 ++---
.../RISCV/rvv/vp-combine-reverse-load.ll | 22 +++++------------
.../RISCV/rvv/vp-combine-store-reverse.ll | 24 +++++--------------
3 files changed, 14 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 430946ebc2411..fa4a76cff1c66 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19278,9 +19278,8 @@ static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG,
// If Mask is all ones, then load is unmasked and can be reversed.
if (!isOneOrOneSplat(LoadMask)) {
// If the mask is not all ones, we can reverse the load if the mask was also
- // reversed by an unmasked vp.reverse with the same EVL.
+ // reversed by a vp.reverse with the same EVL.
if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
- !isOneOrOneSplat(LoadMask.getOperand(1)) ||
LoadMask.getOperand(2) != VPLoad->getVectorLength())
return SDValue();
LoadMask = LoadMask.getOperand(0);
@@ -19338,9 +19337,8 @@ static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG,
// If Mask is all ones, then load is unmasked and can be reversed.
if (!isOneOrOneSplat(StoreMask)) {
// If the mask is not all ones, we can reverse the store if the mask was
- // also reversed by an unmasked vp.reverse with the same EVL.
+ // also reversed by a vp.reverse with the same EVL.
if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
- !isOneOrOneSplat(StoreMask.getOperand(1)) ||
StoreMask.getOperand(2) != VPStore->getVectorLength())
return SDValue();
StoreMask = StoreMask.getOperand(0);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll
index 682aeffdcfdb0..73700ecda5060 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll
@@ -36,22 +36,12 @@ define <vscale x 2 x float> @test_load_mask_is_vp_reverse(<vscale x 2 x float>*
define <vscale x 2 x float> @test_load_mask_is_vp_reverse_with_mask(<vscale x 2 x float>* %ptr, <vscale x 2 x i1> %mask, <vscale x 2 x i1> %revmask, i32 zeroext %evl) {
; CHECK-LABEL: test_load_mask_is_vp_reverse_with_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 0
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vid.v v10, v0.t
-; CHECK-NEXT: vrsub.vx v10, v10, a1, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t
-; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t
-; CHECK-NEXT: vle32.v v9, (a0), v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vid.v v8
-; CHECK-NEXT: vrsub.vx v10, v8, a1
-; CHECK-NEXT: vrgather.vv v8, v9, v10
+; CHECK-NEXT: slli a2, a1, 2
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: addi a0, a0, -4
+; CHECK-NEXT: li a2, -4
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vlse32.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
%loadmask = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %revmask, i32 %evl)
%load = call <vscale x 2 x float> @llvm.vp.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>* %ptr, <vscale x 2 x i1> %loadmask, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll
index 190ef968c50be..a2d393d3f6ea3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll
@@ -36,24 +36,12 @@ define void @test_store_mask_is_vp_reverse(<vscale x 2 x float> %val, <vscale x
define void @test_store_mask_is_vp_reverse_with_mask(<vscale x 2 x float> %val, <vscale x 2 x float>* %ptr, <vscale x 2 x i1> %mask, <vscale x 2 x i1> %revmask, i32 zeroext %evl) {
; CHECK-LABEL: test_store_mask_is_vp_reverse_with_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vid.v v11, v0.t
-; CHECK-NEXT: vrsub.vx v11, v11, a1, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v12, v10, v11, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vse32.v v9, (a0), v0.t
+; CHECK-NEXT: slli a2, a1, 2
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: addi a0, a0, -4
+; CHECK-NEXT: li a2, -4
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsse32.v v8, (a0), a2, v0.t
; CHECK-NEXT: ret
%storemask = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %revmask, i32 %evl)
%rev = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x i1> splat (i1 true), i32 %evl)
More information about the llvm-commits
mailing list