[llvm] [LLVM][CodeGen][SVE] List MVTs that are desirable for extending loads. (PR #149153)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 16 10:53:53 PDT 2025
https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/149153
Extend AArch64TargetLowering::isVectorLoadExtDesirable to specify the set of MVT for which load extension is desirable.
NOTE: The PR uncovered two instances where isVectorLoadExtDesirable was called incorrectly by passing in the load node rather than the expected extension.
Fixes https://github.com/llvm/llvm-project/issues/148939
>From ba77472bec74423a611a4d0014e17a230d03399f Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 16 Jul 2025 17:40:22 +0100
Subject: [PATCH] [LLVM][CodeGen][SVE] List MVTs that are desirable for
extending loads.
Extend AArch64TargetLowering::isVectorLoadExtDesirable to specify the
set of MVT for which load extension is desirable.
NOTE: The PR uncovered two instances where isVectorLoadExtDesirable
was called incorrectly by passing in the load node rather than the
expected extension.
Fixes https://github.com/llvm/llvm-project/issues/148939
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++--
.../Target/AArch64/AArch64ISelLowering.cpp | 4 +++-
.../AArch64/sve-intrinsics-ldst-ext.ll | 24 +++++++++++++++++++
3 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0e8e4c9618bb2..e87e25f8f816d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7633,7 +7633,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue(GN0, 0).hasOneUse() &&
isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
- TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(N, 0)))) {
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
@@ -15724,7 +15724,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
- TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(N, 0)))) {
SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4f13a14d24649..2775bdb175556 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6439,7 +6439,9 @@ bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
}
}
- return true;
+ EVT PreExtScalarVT = ExtVal->getOperand(0).getValueType().getScalarType();
+ return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
+ PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
index 4153f0be611a1..9698f1a6768fd 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
@@ -231,3 +231,27 @@ define <vscale x 8 x i64> @sload_8i8_8i64(ptr %a) {
%aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
ret <vscale x 8 x i64> %aext
}
+
+; Ensure we don't try to promote a predicate load to a sign-extended load.
+define <vscale x 16 x i8> @sload_16i1_16i8(ptr %addr) {
+; CHECK-LABEL: sload_16i1_16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr p0, [x0]
+; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: ret
+ %load = load <vscale x 16 x i1>, ptr %addr
+ %zext = sext <vscale x 16 x i1> %load to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %zext
+}
+
+; Ensure we don't try to promote a predicate load to a zero-extended load.
+define <vscale x 16 x i8> @zload_16i1_16i8(ptr %addr) {
+; CHECK-LABEL: zload_16i1_16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr p0, [x0]
+; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
+; CHECK-NEXT: ret
+ %load = load <vscale x 16 x i1>, ptr %addr
+ %zext = zext <vscale x 16 x i1> %load to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %zext
+}
More information about the llvm-commits
mailing list