[llvm] 94aa08a - [LLVM][CodeGen][SVE] Don't combine shifts at the expense of addressing modes. (#149873)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 24 02:54:52 PDT 2025
Author: Paul Walker
Date: 2025-07-24T10:54:48+01:00
New Revision: 94aa08a3b0e979e6977619064a27ca74bb15fcf6
URL: https://github.com/llvm/llvm-project/commit/94aa08a3b0e979e6977619064a27ca74bb15fcf6
DIFF: https://github.com/llvm/llvm-project/commit/94aa08a3b0e979e6977619064a27ca74bb15fcf6.diff
LOG: [LLVM][CodeGen][SVE] Don't combine shifts at the expense of addressing modes. (#149873)
Fixes https://github.com/llvm/llvm-project/issues/149654
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d7baee4387561..f2b69edd73b0f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18022,11 +18022,14 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
unsigned ShlAmt = C2->getZExtValue();
if (auto ShouldADD = *N->user_begin();
ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
- if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
- unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
- if ((1ULL << ShlAmt) == ByteVT &&
- isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
- return false;
+ if (auto Load = dyn_cast<LoadSDNode>(*ShouldADD->user_begin())) {
+ EVT MemVT = Load->getMemoryVT();
+
+ if (Load->getValueType(0).isScalableVector())
+ return (8ULL << ShlAmt) != MemVT.getScalarSizeInBits();
+
+ if (isIndexedLoadLegal(ISD::PRE_INC, MemVT))
+ return (8ULL << ShlAmt) != MemVT.getFixedSizeInBits();
}
}
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
index 9dfc8df703ce6..9666c5cc298ff 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
@@ -136,3 +136,18 @@ entry:
%0 = load i64, ptr %arrayidx, align 8
ret i64 %0
}
+
+define <2 x i64> @loadv2i64_shr1(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: loadv2i64_shr1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: lsr x8, x8, #1
+; CHECK-NEXT: ldr q0, [x2, x8, lsl #4]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 1
+ %arrayidx = getelementptr inbounds <2 x i64>, ptr %table, i64 %shr
+ %0 = load <2 x i64>, ptr %arrayidx, align 16
+ ret <2 x i64> %0
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
index 05abfa319d389..29e94dd6c5242 100644
--- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
@@ -268,6 +268,20 @@ define <vscale x 2 x bfloat> @ld1_nxv2bf16(ptr %addr, i64 %off) {
ret <vscale x 2 x bfloat> %val
}
+; Ensure we don't lose the free shift when using indexed addressing.
+define <vscale x 2 x bfloat> @ld1_nxv2bf16_double_shift(ptr %addr, i64 %off) {
+; CHECK-LABEL: ld1_nxv2bf16_double_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: lsr x8, x1, #6
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
+; CHECK-NEXT: ret
+ %off2 = lshr i64 %off, 6
+ %ptr = getelementptr inbounds bfloat, ptr %addr, i64 %off2
+ %val = load volatile <vscale x 2 x bfloat>, ptr %ptr
+ ret <vscale x 2 x bfloat> %val
+}
+
; LD1W
define <vscale x 4 x i32> @ld1_nxv4i32(ptr %addr, i64 %off) {
@@ -327,6 +341,20 @@ define <vscale x 2 x float> @ld1_nxv2f32(ptr %addr, i64 %off) {
ret <vscale x 2 x float> %val
}
+; Ensure we don't lose the free shift when using indexed addressing.
+define <vscale x 2 x float> @ld1_nxv2f32_double_shift(ptr %addr, i64 %off) {
+; CHECK-LABEL: ld1_nxv2f32_double_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: lsr x8, x1, #6
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+ %off2 = lshr i64 %off, 6
+ %ptr = getelementptr inbounds float, ptr %addr, i64 %off2
+ %val = load volatile <vscale x 2 x float>, ptr %ptr
+ ret <vscale x 2 x float> %val
+}
+
; LD1D
define <vscale x 2 x i64> @ld1_nxv2i64(ptr %addr, i64 %off) {
@@ -350,3 +378,17 @@ define <vscale x 2 x double> @ld1_nxv2f64(ptr %addr, i64 %off) {
%val = load volatile <vscale x 2 x double>, ptr %ptr
ret <vscale x 2 x double> %val
}
+
+; Ensure we don't lose the free shift when using indexed addressing.
+define <vscale x 2 x double> @ld1_nxv2f64_double_shift(ptr %addr, i64 %off) {
+; CHECK-LABEL: ld1_nxv2f64_double_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: lsr x8, x1, #6
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
+; CHECK-NEXT: ret
+ %off2 = lshr i64 %off, 6
+ %ptr = getelementptr inbounds double, ptr %addr, i64 %off2
+ %val = load volatile <vscale x 2 x double>, ptr %ptr
+ ret <vscale x 2 x double> %val
+}
More information about the llvm-commits
mailing list