[llvm] [RISCV] Legalize misaligned unmasked vp.load/vp.store to vle8/vse8. (PR #167745)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 18 08:35:55 PST 2025
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/167745
>From 2188f21028f32628e5a5c63d7c1c919463f74365 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 12 Nov 2025 11:19:41 -0800
Subject: [PATCH 1/2] [RISCV] Legalize misaligned unmasked vp.load/vp.store to
vle8/vse8.
If vector-unaligned-mem support is not enabled, we should not
generate loads/stores that are not aligned to their element size.
We already do this for non-VP vector loads/stores.
This code has been in our downstream for about a year and a half
after finding the vectorizer generating misaligned loads/stores.
I don't think that is unique to our downstream, but I'm not sure.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 101 +++++++++++++++++-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 3 +
.../rvv/fixed-vectors-shuffle-exact-vlen.ll | 4 +-
llvm/test/CodeGen/RISCV/rvv/vpload.ll | 13 +++
llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 13 +++
5 files changed, 130 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5a081d54d0726..10ab1ec97794d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6853,6 +6853,97 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
Store->getMemOperand()->getFlags());
}
+// While RVV has alignment restrictions, we should always be able to load as a
+// legal equivalently-sized byte-typed vector instead. This method is
+// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If
+// the load is already correctly-aligned, it returns SDValue().
+SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Load = cast<VPLoadSDNode>(Op);
+ assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
+
+ if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Load->getMemoryVT(),
+ *Load->getMemOperand()))
+ return SDValue();
+
+ SDValue Mask = Load->getMask();
+
+ // FIXME: Handled masked loads somehow.
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return SDValue();
+
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ unsigned EltSizeBits = VT.getScalarSizeInBits();
+ assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
+ "Unexpected unaligned RVV load type");
+ MVT NewVT =
+ MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
+ assert(NewVT.isValid() &&
+ "Expecting equally-sized RVV vector types to be legal");
+
+ SDValue VL = Load->getVectorLength();
+ VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
+ DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
+
+ SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),
+ DAG.getAllOnesConstant(DL, Mask.getValueType()), VL,
+ Load->getPointerInfo(), Load->getBaseAlign(),
+ Load->getMemOperand()->getFlags(), AAMDNodes());
+ return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
+}
+
+// While RVV has alignment restrictions, we should always be able to store as a
+// legal equivalently-sized byte-typed vector instead. This method is
+// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type.
+// It returns SDValue() if the store is already correctly aligned.
+SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Store = cast<VPStoreSDNode>(Op);
+ assert(Store && Store->getValue().getValueType().isVector() &&
+ "Expected vector store");
+
+ if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Store->getMemoryVT(),
+ *Store->getMemOperand()))
+ return SDValue();
+
+ SDValue Mask = Store->getMask();
+
+ // FIXME: Handled masked stores somehow.
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue StoredVal = Store->getValue();
+ MVT VT = StoredVal.getSimpleValueType();
+ unsigned EltSizeBits = VT.getScalarSizeInBits();
+ assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
+ "Unexpected unaligned RVV store type");
+ MVT NewVT =
+ MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
+ assert(NewVT.isValid() &&
+ "Expecting equally-sized RVV vector types to be legal");
+
+ SDValue VL = Store->getVectorLength();
+ VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
+ DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
+
+ StoredVal = DAG.getBitcast(NewVT, StoredVal);
+
+ LocationSize Size = LocationSize::precise(NewVT.getStoreSize());
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,
+ Store->getBaseAlign());
+
+ return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
+ DAG.getUNDEF(Store->getBasePtr().getValueType()),
+ DAG.getAllOnesConstant(DL, Mask.getValueType()), VL,
+ NewVT, MMO, ISD::UNINDEXED);
+}
+
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
@@ -8408,13 +8499,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
return Op;
}
- case ISD::MLOAD:
case ISD::VP_LOAD:
+ if (SDValue V = expandUnalignedVPLoad(Op, DAG))
+ return V;
+ [[fallthrough]];
+ case ISD::MLOAD:
return lowerMaskedLoad(Op, DAG);
case ISD::VP_LOAD_FF:
return lowerLoadFF(Op, DAG);
- case ISD::MSTORE:
case ISD::VP_STORE:
+ if (SDValue V = expandUnalignedVPStore(Op, DAG))
+ return V;
+ [[fallthrough]];
+ case ISD::MSTORE:
return lowerMaskedStore(Op, DAG);
case ISD::VECTOR_COMPRESS:
return lowerVectorCompress(Op, DAG);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5cc427c867cfd..616664306bcab 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -576,6 +576,9 @@ class RISCVTargetLowering : public TargetLowering {
SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue expandUnalignedVPLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue expandUnalignedVPStore(SDValue Op, SelectionDAG &DAG) const;
+
SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
index c3fe6b335d3da..2b800c449953b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
@@ -180,8 +180,8 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
; CHECK-LABEL: shuffle1:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 252
-; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma
+; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: vmv.v.i v0, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
index edfa4a7560949..3a26af0279d50 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
@@ -612,6 +612,19 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
ret <vscale x 16 x double> %lo
}
+define <vscale x 1 x i64> @unaligned_vpload_nxv1i64_allones_mask(<vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: unaligned_vpload_nxv1i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(<vscale x 1 x i64>* align 1 %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret <vscale x 1 x i64> %load
+}
+
define <vscale x 8 x i8> @vpload_all_active_nxv8i8(ptr %ptr) {
; CHECK-LABEL: vpload_all_active_nxv8i8:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
index 3b406656a4dd6..982ec218e4688 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
@@ -511,6 +511,19 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
ret void
}
+define void @unaligned_vpstore_nxv1i64_allones_mask(<vscale x 1 x i64> %val, <vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: unaligned_vpstore_nxv1i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, <vscale x 1 x i64>* align 1 %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret void
+}
+
define void @vpstore_all_active_nxv8i8(<vscale x 8 x i8> %val, ptr %ptr) {
; CHECK-LABEL: vpstore_all_active_nxv8i8:
; CHECK: # %bb.0:
>From 4fcd63921ae87ffe5e31724097debc5410cf5a80 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 18 Nov 2025 08:35:23 -0800
Subject: [PATCH 2/2] fixup! Fix the mask VT and add asserts
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 +++++++++--
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +++++---
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 363c71d84694f..9084f727538ae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -9903,8 +9903,6 @@ SDValue SelectionDAG::getLoadVP(
MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
const MDNode *Ranges, bool IsExpanding) {
- assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
-
MMOFlags |= MachineMemOperand::MOLoad;
assert((MMOFlags & MachineMemOperand::MOStore) == 0);
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
@@ -9926,6 +9924,11 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
SDValue Offset, SDValue Mask, SDValue EVL,
EVT MemVT, MachineMemOperand *MMO,
bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ assert(Mask.getValueType().getVectorElementCount() ==
+ VT.getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+
bool Indexed = AM != ISD::UNINDEXED;
assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
@@ -10021,6 +10024,10 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
ISD::MemIndexedMode AM, bool IsTruncating,
bool IsCompressing) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ assert(Mask.getValueType().getVectorElementCount() ==
+ Val.getValueType().getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+
bool Indexed = AM != ISD::UNINDEXED;
assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!");
SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 10ab1ec97794d..42badefc5d839 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6887,8 +6887,9 @@ SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,
VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),
- DAG.getAllOnesConstant(DL, Mask.getValueType()), VL,
+ DAG.getAllOnesConstant(DL, MaskVT), VL,
Load->getPointerInfo(), Load->getBaseAlign(),
Load->getMemOperand()->getFlags(), AAMDNodes());
return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
@@ -6938,10 +6939,11 @@ SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,
Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,
Store->getBaseAlign());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
DAG.getUNDEF(Store->getBasePtr().getValueType()),
- DAG.getAllOnesConstant(DL, Mask.getValueType()), VL,
- NewVT, MMO, ISD::UNINDEXED);
+ DAG.getAllOnesConstant(DL, MaskVT), VL, NewVT, MMO,
+ ISD::UNINDEXED);
}
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
More information about the llvm-commits
mailing list