[llvm] [SelectionDAG][RISCV] Use VP_LOAD to widen MLOAD in type legalization when possible. (PR #140595)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed May 21 13:57:50 PDT 2025
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/140595
>From 9ead87c8321bd89c05e5d6489ba4c5ecc694b96a Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 19 May 2025 10:24:33 -0700
Subject: [PATCH 1/2] [SelectionDAG][RISCV] Use VP_LOAD to widen MLOAD in type
legalization when possible.
Padding the mask using 0 elements doesn't work for scalable vectors.
Use VP_LOAD and change the VL instead.
This fixes crash for Zve32x. Test was split since i64 isn't a valid
element type for Zve32x.
Fixes #140198.
---
.../SelectionDAG/LegalizeVectorTypes.cpp | 29 ++++-
.../CodeGen/RISCV/rvv/masked-load-int-e64.ll | 47 ++++++++
.../test/CodeGen/RISCV/rvv/masked-load-int.ll | 104 ++++++++----------
3 files changed, 114 insertions(+), 66 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4e9a6942d6cd9..59a9dced6cf03 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6111,18 +6111,37 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_COMPRESS(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
-
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
SDValue PassThru = GetWidenedVector(N->getPassThru());
ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N);
+ EVT WideMaskVT =
+ EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(),
+ WidenVT.getVectorElementCount());
+
+ if (ExtType == ISD::NON_EXTLOAD &&
+ TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WidenVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ Mask = DAG.getInsertSubvector(dl, DAG.getUNDEF(WideMaskVT), Mask, 0);
+ SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
+ VT.getVectorElementCount());
+ SDValue NewLoad =
+ DAG.getLoadVP(N->getAddressingMode(), ISD::NON_EXTLOAD, WidenVT, dl,
+ N->getChain(), N->getBasePtr(), N->getOffset(), Mask, EVL,
+ N->getMemoryVT(), N->getMemOperand());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
+
+ return NewLoad;
+ }
+
// The mask should be widened as well
- EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
- MaskVT.getVectorElementType(),
- WidenVT.getVectorNumElements());
Mask = ModifyToType(Mask, WideMaskVT, true);
SDValue Res = DAG.getMaskedLoad(
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll
new file mode 100644
index 0000000000000..493d55f6eefe6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 1 x i64> @masked_load_nxv1i64(ptr %a, <vscale x 1 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+ %load = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr %a, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i64> undef)
+ ret <vscale x 1 x i64> %load
+}
+declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
+
+define <vscale x 2 x i64> @masked_load_nxv2i64(ptr %a, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+ %load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr %a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
+ ret <vscale x 2 x i64> %load
+}
+declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+define <vscale x 4 x i64> @masked_load_nxv4i64(ptr %a, <vscale x 4 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+ %load = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr %a, i32 8, <vscale x 4 x i1> %mask, <vscale x 4 x i64> undef)
+ ret <vscale x 4 x i64> %load
+}
+declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
+
+define <vscale x 8 x i64> @masked_load_nxv8i64(ptr %a, <vscale x 8 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+ %load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
+ ret <vscale x 8 x i64> %load
+}
+declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
index f8c1d5e45bc28..d992669306fb1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -1,51 +1,66 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V
+; RUN: llc -mtriple=riscv32 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32
+; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32
define <vscale x 1 x i8> @masked_load_nxv1i8(ptr %a, <vscale x 1 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv1i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0), v0.t
-; CHECK-NEXT: ret
+; V-LABEL: masked_load_nxv1i8:
+; V: # %bb.0:
+; V-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; V-NEXT: vle8.v v8, (a0), v0.t
+; V-NEXT: ret
+;
+; ZVE32-LABEL: masked_load_nxv1i8:
+; ZVE32: # %bb.0:
+; ZVE32-NEXT: csrr a1, vlenb
+; ZVE32-NEXT: srli a1, a1, 3
+; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; ZVE32-NEXT: vle8.v v8, (a0), v0.t
+; ZVE32-NEXT: ret
%load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> undef)
ret <vscale x 1 x i8> %load
}
declare <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
define <vscale x 1 x i16> @masked_load_nxv1i16(ptr %a, <vscale x 1 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv1i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0), v0.t
-; CHECK-NEXT: ret
+; V-LABEL: masked_load_nxv1i16:
+; V: # %bb.0:
+; V-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; V-NEXT: vle16.v v8, (a0), v0.t
+; V-NEXT: ret
+;
+; ZVE32-LABEL: masked_load_nxv1i16:
+; ZVE32: # %bb.0:
+; ZVE32-NEXT: csrr a1, vlenb
+; ZVE32-NEXT: srli a1, a1, 3
+; ZVE32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; ZVE32-NEXT: vle16.v v8, (a0), v0.t
+; ZVE32-NEXT: ret
%load = call <vscale x 1 x i16> @llvm.masked.load.nxv1i16(ptr %a, i32 2, <vscale x 1 x i1> %mask, <vscale x 1 x i16> undef)
ret <vscale x 1 x i16> %load
}
declare <vscale x 1 x i16> @llvm.masked.load.nxv1i16(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i16>)
define <vscale x 1 x i32> @masked_load_nxv1i32(ptr %a, <vscale x 1 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv1i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0), v0.t
-; CHECK-NEXT: ret
+; V-LABEL: masked_load_nxv1i32:
+; V: # %bb.0:
+; V-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; V-NEXT: vle32.v v8, (a0), v0.t
+; V-NEXT: ret
+;
+; ZVE32-LABEL: masked_load_nxv1i32:
+; ZVE32: # %bb.0:
+; ZVE32-NEXT: csrr a1, vlenb
+; ZVE32-NEXT: srli a1, a1, 3
+; ZVE32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; ZVE32-NEXT: vle32.v v8, (a0), v0.t
+; ZVE32-NEXT: ret
%load = call <vscale x 1 x i32> @llvm.masked.load.nxv1i32(ptr %a, i32 4, <vscale x 1 x i1> %mask, <vscale x 1 x i32> undef)
ret <vscale x 1 x i32> %load
}
declare <vscale x 1 x i32> @llvm.masked.load.nxv1i32(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i32>)
-define <vscale x 1 x i64> @masked_load_nxv1i64(ptr %a, <vscale x 1 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv1i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0), v0.t
-; CHECK-NEXT: ret
- %load = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr %a, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i64> undef)
- ret <vscale x 1 x i64> %load
-}
-declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
-
define <vscale x 2 x i8> @masked_load_nxv2i8(ptr %a, <vscale x 2 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv2i8:
; CHECK: # %bb.0:
@@ -79,17 +94,6 @@ define <vscale x 2 x i32> @masked_load_nxv2i32(ptr %a, <vscale x 2 x i1> %mask)
}
declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
-define <vscale x 2 x i64> @masked_load_nxv2i64(ptr %a, <vscale x 2 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0), v0.t
-; CHECK-NEXT: ret
- %load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr %a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
- ret <vscale x 2 x i64> %load
-}
-declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-
define <vscale x 4 x i8> @masked_load_nxv4i8(ptr %a, <vscale x 4 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv4i8:
; CHECK: # %bb.0:
@@ -123,17 +127,6 @@ define <vscale x 4 x i32> @masked_load_nxv4i32(ptr %a, <vscale x 4 x i1> %mask)
}
declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-define <vscale x 4 x i64> @masked_load_nxv4i64(ptr %a, <vscale x 4 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0), v0.t
-; CHECK-NEXT: ret
- %load = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr %a, i32 8, <vscale x 4 x i1> %mask, <vscale x 4 x i64> undef)
- ret <vscale x 4 x i64> %load
-}
-declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
-
define <vscale x 8 x i8> @masked_load_nxv8i8(ptr %a, <vscale x 8 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv8i8:
; CHECK: # %bb.0:
@@ -167,17 +160,6 @@ define <vscale x 8 x i32> @masked_load_nxv8i32(ptr %a, <vscale x 8 x i1> %mask)
}
declare <vscale x 8 x i32> @llvm.masked.load.nxv8i32(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
-define <vscale x 8 x i64> @masked_load_nxv8i64(ptr %a, <vscale x 8 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv8i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0), v0.t
-; CHECK-NEXT: ret
- %load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
- ret <vscale x 8 x i64> %load
-}
-declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
-
define <vscale x 16 x i8> @masked_load_nxv16i8(ptr %a, <vscale x 16 x i1> %mask) nounwind {
; CHECK-LABEL: masked_load_nxv16i8:
; CHECK: # %bb.0:
>From 623dfc453d67f87e6ef1ada21fb5b531397c015b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 21 May 2025 13:57:31 -0700
Subject: [PATCH 2/2] fixup! Update fixed vector tests.
---
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll | 5 +----
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll | 5 +----
2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
index 636af9535f6fa..76ce41655ea1e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
@@ -325,10 +325,7 @@ define <128 x half> @masked_load_v128f16(ptr %a, <128 x i1> %mask) {
define <7 x float> @masked_load_v7f32(ptr %a, <7 x i1> %mask) {
; CHECK-LABEL: masked_load_v7f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 127
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v8, a1
-; CHECK-NEXT: vmand.mm v0, v0, v8
+; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <7 x float> @llvm.masked.load.v7f32(ptr %a, i32 8, <7 x i1> %mask, <7 x float> undef)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
index f8f8a0c22d212..545c89495e621 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
@@ -334,10 +334,7 @@ define <256 x i8> @masked_load_v256i8(ptr %a, <256 x i1> %mask) {
define <7 x i8> @masked_load_v7i8(ptr %a, <7 x i1> %mask) {
; CHECK-LABEL: masked_load_v7i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 127
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v8, a1
-; CHECK-NEXT: vmand.mm v0, v0, v8
+; CHECK-NEXT: vsetivli zero, 7, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0), v0.t
; CHECK-NEXT: ret
%load = call <7 x i8> @llvm.masked.load.v7i8(ptr %a, i32 8, <7 x i1> %mask, <7 x i8> undef)
More information about the llvm-commits
mailing list