[llvm] [SelectionDAG] Take passthru into account when widening ISD::MLOAD (PR #144170)

Fri Jun 13 15:51:31 PDT 2025

https://github.com/mshockwave created https://github.com/llvm/llvm-project/pull/144170

#140595 used vp.load in the cases where we need to widen masked.load. However, we didn't account for the passthru operand so it might miscompile when the passthru is not undef. While we can simply avoid using vp.load to widen when passthru is not undef, doing so will ran into the exact same crash described in #140198 , so for scalable vector, this patch manually merges the loaded result with passthru when the latter is not undef.

-------
I guess the reason we never ran into any problem (at least in LLVM) was because SLP always use undef passthru operand in masked.load. But other frontend like MLIR does use passthru in masked.load to do things like padding.

>From e21c656a078e55c115c55f7bd84d5f379dbd94be Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 13 Jun 2025 14:51:06 -0700
Subject: [PATCH 1/2] Pre-commit tests

---
 .../rvv/fixed-vectors-masked-load-int.ll      |  9 +++++++++
 .../test/CodeGen/RISCV/rvv/masked-load-int.ll | 20 ++++++++++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
index 545c89495e621..54a7ab2ef1174 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
@@ -341,3 +341,12 @@ define <7 x i8> @masked_load_v7i8(ptr %a, <7 x i1> %mask) {
   ret <7 x i8> %load
 }
 
+define <7 x i8> @masked_load_passthru_v7i8(ptr %a, <7 x i1> %mask) {
+; CHECK-LABEL: masked_load_passthru_v7i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %load = call <7 x i8> @llvm.masked.load.v7i8(ptr %a, i32 8, <7 x i1> %mask, <7 x i8> zeroinitializer)
+  ret <7 x i8> %load
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
index d992669306fb1..3ae5cc6f785a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -21,7 +21,25 @@ define <vscale x 1 x i8> @masked_load_nxv1i8(ptr %a, <vscale x 1 x i1> %mask) no
   %load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> undef)
   ret <vscale x 1 x i8> %load
 }
-declare <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
+
+define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1> %mask) nounwind {
+; V-LABEL: masked_load_passthru_nxv1i8:
+; V:       # %bb.0:
+; V-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
+; V-NEXT:    vmv.v.i v8, 0
+; V-NEXT:    vle8.v v8, (a0), v0.t
+; V-NEXT:    ret
+;
+; ZVE32-LABEL: masked_load_passthru_nxv1i8:
+; ZVE32:       # %bb.0:
+; ZVE32-NEXT:    csrr a1, vlenb
+; ZVE32-NEXT:    srli a1, a1, 3
+; ZVE32-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; ZVE32-NEXT:    vle8.v v8, (a0), v0.t
+; ZVE32-NEXT:    ret
+  %load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)
+  ret <vscale x 1 x i8> %load
+}
 
 define <vscale x 1 x i16> @masked_load_nxv1i16(ptr %a, <vscale x 1 x i1> %mask) nounwind {
 ; V-LABEL: masked_load_nxv1i16:

>From 7cc1ea4a804d99330bfeb7b5bf227814d3e452a5 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 13 Jun 2025 15:39:48 -0700
Subject: [PATCH 2/2] [SelectionDAG] Take passthru into account when widening
 ISD::MLOAD

---
 .../SelectionDAG/LegalizeVectorTypes.cpp        | 17 +++++++++++++++--
 .../RISCV/rvv/fixed-vectors-masked-load-int.ll  |  6 +++++-
 llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll  |  4 +++-
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f63fe17da51ff..c56cfec81acdd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6149,7 +6149,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
 
   if (ExtType == ISD::NON_EXTLOAD &&
       TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WidenVT) &&
-      TLI.isTypeLegal(WideMaskVT)) {
+      TLI.isTypeLegal(WideMaskVT) &&
+      // If there is a passthru, we shouldn't use vp.load. However,
+      // type legalizer will struggle on masked.load with
+      // scalable vectors, so for scalable vectors, we still use vp.load
+      // but manually merge the load result with the passthru using vp.select.
+      (N->getPassThru()->isUndef() || VT.isScalableVector())) {
     Mask = DAG.getInsertSubvector(dl, DAG.getUNDEF(WideMaskVT), Mask, 0);
     SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
                                       VT.getVectorElementCount());
@@ -6157,12 +6162,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
         DAG.getLoadVP(N->getAddressingMode(), ISD::NON_EXTLOAD, WidenVT, dl,
                       N->getChain(), N->getBasePtr(), N->getOffset(), Mask, EVL,
                       N->getMemoryVT(), N->getMemOperand());
+    SDValue NewVal = NewLoad;
+
+    // Manually merge with vp.select
+    if (!N->getPassThru()->isUndef()) {
+      assert(WidenVT.isScalableVector());
+      NewVal =
+          DAG.getNode(ISD::VP_SELECT, dl, WidenVT, Mask, NewVal, PassThru, EVL);
+    }
 
     // Modified the chain - switch anything that used the old chain to use
     // the new one.
     ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
 
-    return NewLoad;
+    return NewVal;
   }
 
   // The mask should be widened as well
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
index 54a7ab2ef1174..ed60d91308495 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
@@ -344,7 +344,11 @@ define <7 x i8> @masked_load_v7i8(ptr %a, <7 x i1> %mask) {
 define <7 x i8> @masked_load_passthru_v7i8(ptr %a, <7 x i1> %mask) {
 ; CHECK-LABEL: masked_load_passthru_v7i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 7, e8, mf2, ta, ma
+; CHECK-NEXT:    li a1, 127
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv.s.x v8, a1
+; CHECK-NEXT:    vmand.mm v0, v0, v8
+; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %load = call <7 x i8> @llvm.masked.load.v7i8(ptr %a, i32 8, <7 x i1> %mask, <7 x i8> zeroinitializer)
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
index 3ae5cc6f785a8..75537406f3515 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -34,7 +34,9 @@ define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1>
 ; ZVE32:       # %bb.0:
 ; ZVE32-NEXT:    csrr a1, vlenb
 ; ZVE32-NEXT:    srli a1, a1, 3
-; ZVE32-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; ZVE32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; ZVE32-NEXT:    vmv.v.i v8, 0
+; ZVE32-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
 ; ZVE32-NEXT:    vle8.v v8, (a0), v0.t
 ; ZVE32-NEXT:    ret
   %load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)