[llvm] 0e9b6d6 - [IA][RISCV] Detecting gap mask from a mask assembled by interleaveN intrinsics (#153510)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 09:22:50 PDT 2025
Author: Min-Yih Hsu
Date: 2025-08-15T09:22:47-07:00
New Revision: 0e9b6d6c8a111e214a3907fe97ccadf8f438d854
URL: https://github.com/llvm/llvm-project/commit/0e9b6d6c8a111e214a3907fe97ccadf8f438d854
DIFF: https://github.com/llvm/llvm-project/commit/0e9b6d6c8a111e214a3907fe97ccadf8f438d854.diff
LOG: [IA][RISCV] Detecting gap mask from a mask assembled by interleaveN intrinsics (#153510)
If the mask of a (fixed-vector) deinterleaved load is assembled by
`vector.interleaveN` intrinsic, any intrinsic arguments that are
all-zeros are regarded as gaps.
Added:
Modified:
llvm/lib/CodeGen/InterleavedAccessPass.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index bf128500f6005..93f6e39b56ab6 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -596,8 +596,26 @@ static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
if (unsigned F = getInterleaveIntrinsicFactor(IMI->getIntrinsicID());
- F && F == Factor && llvm::all_equal(IMI->args())) {
- return {IMI->getArgOperand(0), GapMask};
+ F && F == Factor) {
+ Value *RefArg = nullptr;
+ // Check if all the intrinsic arguments are the same, except those that
+ // are zeros, which we mark as gaps in the gap mask.
+ for (auto [Idx, Arg] : enumerate(IMI->args())) {
+ if (auto *C = dyn_cast<Constant>(Arg); C && C->isZeroValue()) {
+ GapMask.clearBit(Idx);
+ continue;
+ }
+
+ if (!RefArg)
+ RefArg = Arg;
+ else if (RefArg != Arg)
+ return {nullptr, GapMask};
+ }
+
+ // In a very rare occasion, all the intrinsic arguments might be zeros,
+ // in which case we still want to return an all-zeros constant instead of
+ // nullptr.
+ return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 470e3095d418d..c426ee7b7d2b1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -205,6 +205,23 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %pt
ret {<4 x i32>, <4 x i32>} %res1
}
+; mask = %m, skip the last two fields.
+define {<2 x i32>, <2 x i32>} @vpload_factor4_interleaved_mask_intrinsic_skip_fields(ptr %ptr, <2 x i1> %m) {
+; CHECK-LABEL: vpload_factor4_interleaved_mask_intrinsic_skip_fields:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; CHECK-NEXT: vlsseg2e32.v v8, (a0), a1, v0.t
+; CHECK-NEXT: ret
+ %interleaved.mask = call <8 x i1> @llvm.vector.interleave4(<2 x i1> %m, <2 x i1> %m, <2 x i1> splat (i1 false), <2 x i1> splat (i1 false))
+ %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8)
+ %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 0, i32 4>
+ %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 1, i32 5>
+ %res0 = insertvalue {<2 x i32>, <2 x i32>} undef, <2 x i32> %v0, 0
+ %res1 = insertvalue {<2 x i32>, <2 x i32>} %res0, <2 x i32> %v1, 1
+ ret {<2 x i32>, <2 x i32>} %res1
+}
+
define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle(ptr %ptr, <4 x i1> %m) {
; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle:
; CHECK: # %bb.0:
@@ -532,8 +549,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: li a2, 32
; RV32-NEXT: lui a3, 12
; RV32-NEXT: lui a6, 12291
-; RV32-NEXT: lui a7, %hi(.LCPI26_0)
-; RV32-NEXT: addi a7, a7, %lo(.LCPI26_0)
+; RV32-NEXT: lui a7, %hi(.LCPI27_0)
+; RV32-NEXT: addi a7, a7, %lo(.LCPI27_0)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vle32.v v24, (a5)
; RV32-NEXT: vmv.s.x v0, a3
@@ -618,12 +635,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
; RV32-NEXT: lui a7, 49164
-; RV32-NEXT: lui a1, %hi(.LCPI26_1)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI26_1)
+; RV32-NEXT: lui a1, %hi(.LCPI27_1)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI27_1)
; RV32-NEXT: lui t2, 3
; RV32-NEXT: lui t1, 196656
-; RV32-NEXT: lui a4, %hi(.LCPI26_3)
-; RV32-NEXT: addi a4, a4, %lo(.LCPI26_3)
+; RV32-NEXT: lui a4, %hi(.LCPI27_3)
+; RV32-NEXT: addi a4, a4, %lo(.LCPI27_3)
; RV32-NEXT: lui t0, 786624
; RV32-NEXT: li a5, 48
; RV32-NEXT: lui a6, 768
@@ -802,8 +819,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
-; RV32-NEXT: lui a1, %hi(.LCPI26_2)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI26_2)
+; RV32-NEXT: lui a1, %hi(.LCPI27_2)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI27_2)
; RV32-NEXT: lui a3, 3073
; RV32-NEXT: addi a3, a3, -1024
; RV32-NEXT: vmv.s.x v0, a3
@@ -867,16 +884,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v28, v24
-; RV32-NEXT: lui a1, %hi(.LCPI26_4)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI26_4)
-; RV32-NEXT: lui a2, %hi(.LCPI26_5)
-; RV32-NEXT: addi a2, a2, %lo(.LCPI26_5)
+; RV32-NEXT: lui a1, %hi(.LCPI27_4)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI27_4)
+; RV32-NEXT: lui a2, %hi(.LCPI27_5)
+; RV32-NEXT: addi a2, a2, %lo(.LCPI27_5)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV32-NEXT: vle16.v v24, (a2)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: lui a1, %hi(.LCPI26_7)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI26_7)
+; RV32-NEXT: lui a1, %hi(.LCPI27_7)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI27_7)
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vle16.v v10, (a1)
; RV32-NEXT: csrr a1, vlenb
@@ -904,14 +921,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
-; RV32-NEXT: lui a1, %hi(.LCPI26_6)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI26_6)
-; RV32-NEXT: lui a2, %hi(.LCPI26_8)
-; RV32-NEXT: addi a2, a2, %lo(.LCPI26_8)
+; RV32-NEXT: lui a1, %hi(.LCPI27_6)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI27_6)
+; RV32-NEXT: lui a2, %hi(.LCPI27_8)
+; RV32-NEXT: addi a2, a2, %lo(.LCPI27_8)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v4, (a1)
-; RV32-NEXT: lui a1, %hi(.LCPI26_9)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI26_9)
+; RV32-NEXT: lui a1, %hi(.LCPI27_9)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI27_9)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV32-NEXT: vle16.v v6, (a1)
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -998,8 +1015,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: li a4, 128
; RV64-NEXT: lui a1, 1
; RV64-NEXT: vle64.v v8, (a3)
-; RV64-NEXT: lui a3, %hi(.LCPI26_0)
-; RV64-NEXT: addi a3, a3, %lo(.LCPI26_0)
+; RV64-NEXT: lui a3, %hi(.LCPI27_0)
+; RV64-NEXT: addi a3, a3, %lo(.LCPI27_0)
; RV64-NEXT: vmv.s.x v0, a4
; RV64-NEXT: csrr a4, vlenb
; RV64-NEXT: li a5, 61
@@ -1187,8 +1204,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
-; RV64-NEXT: lui a2, %hi(.LCPI26_1)
-; RV64-NEXT: addi a2, a2, %lo(.LCPI26_1)
+; RV64-NEXT: lui a2, %hi(.LCPI27_1)
+; RV64-NEXT: addi a2, a2, %lo(.LCPI27_1)
; RV64-NEXT: li a3, 192
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vle16.v v6, (a2)
@@ -1222,8 +1239,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
; RV64-NEXT: addi a2, sp, 16
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; RV64-NEXT: lui a2, %hi(.LCPI26_2)
-; RV64-NEXT: addi a2, a2, %lo(.LCPI26_2)
+; RV64-NEXT: lui a2, %hi(.LCPI27_2)
+; RV64-NEXT: addi a2, a2, %lo(.LCPI27_2)
; RV64-NEXT: li a3, 1040
; RV64-NEXT: vmv.s.x v0, a3
; RV64-NEXT: addi a1, a1, -2016
@@ -1307,12 +1324,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
-; RV64-NEXT: lui a1, %hi(.LCPI26_3)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI26_3)
+; RV64-NEXT: lui a1, %hi(.LCPI27_3)
+; RV64-NEXT: addi a1, a1, %lo(.LCPI27_3)
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vle16.v v20, (a1)
-; RV64-NEXT: lui a1, %hi(.LCPI26_4)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI26_4)
+; RV64-NEXT: lui a1, %hi(.LCPI27_4)
+; RV64-NEXT: addi a1, a1, %lo(.LCPI27_4)
; RV64-NEXT: vle16.v v8, (a1)
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 77
@@ -1363,8 +1380,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
-; RV64-NEXT: lui a1, %hi(.LCPI26_5)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI26_5)
+; RV64-NEXT: lui a1, %hi(.LCPI27_5)
+; RV64-NEXT: addi a1, a1, %lo(.LCPI27_5)
; RV64-NEXT: vle16.v v20, (a1)
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 61
@@ -1981,8 +1998,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
-; RV32-NEXT: lui a1, %hi(.LCPI62_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI62_0)
+; RV32-NEXT: lui a1, %hi(.LCPI63_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI63_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -2057,8 +2074,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: li a0, 146
; RV32-NEXT: vmv.s.x v11, a0
-; RV32-NEXT: lui a0, %hi(.LCPI63_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI63_0)
+; RV32-NEXT: lui a0, %hi(.LCPI64_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI64_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 36
@@ -2277,8 +2294,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor3_invalid_skip_field(
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
-; RV32-NEXT: lui a1, %hi(.LCPI72_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI72_0)
+; RV32-NEXT: lui a1, %hi(.LCPI73_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI73_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
More information about the llvm-commits
mailing list