[llvm] 3b17d04 - [AArch64][SVE] Don't require 16-byte aligned SVE loads/stores with +strict-align (#119732)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 02:23:44 PST 2024
Author: Benjamin Maxwell
Date: 2024-12-16T10:23:40Z
New Revision: 3b17d041dd775e033cca499f2a25548c8c22bb86
URL: https://github.com/llvm/llvm-project/commit/3b17d041dd775e033cca499f2a25548c8c22bb86
DIFF: https://github.com/llvm/llvm-project/commit/3b17d041dd775e033cca499f2a25548c8c22bb86.diff
LOG: [AArch64][SVE] Don't require 16-byte aligned SVE loads/stores with +strict-align (#119732)
Instead, allow any alignment >= the element size (in bytes). This is all
that is needed for (predicated) vector loads even if unaligned accesses
are disabled.
See:
https://developer.arm.com/documentation/ddi0602/2024-09/Shared-Pseudocode/aarch64-functions-memory?lang=en#impl-aarch64.Mem.read.3
Specifically:
```
// Check alignment on size of element accessed, not overall access size.
constant integer alignment = if accdesc.ispair then size DIV 2 else size;
```
The `size` passed to `Mem` by SVE load/store instructions is the element
size.
Added:
llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll
llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a86ee5a6b64d27..c19265613c706d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2569,6 +2569,19 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
unsigned *Fast) const {
+
+ // Allow SVE loads/stores where the alignment >= the size of the element type,
+ // even with +strict-align. Predicated SVE loads/stores (e.g. ld1/st1), used
+ // for stores that come from IR, only require element-size alignment (even if
+ // unaligned accesses are disabled). Without this, these will be forced to
+ // have 16-byte alignment with +strict-align (and fail to lower as we don't
+ // yet support TLI.expandUnalignedLoad() and TLI.expandUnalignedStore()).
+ if (VT.isScalableVector()) {
+ unsigned ElementSizeBits = VT.getScalarSizeInBits();
+ if (ElementSizeBits % 8 == 0 && Alignment >= Align(ElementSizeBits / 8))
+ return true;
+ }
+
if (Subtarget->requiresStrictAlign())
return false;
diff --git a/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll b/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll
new file mode 100644
index 00000000000000..c5b0651ab01d44
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+strict-align < %s | FileCheck %s
+
+define void @nxv16i8(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: st1b { z0.b }, p0, [x1]
+; CHECK-NEXT: ret
+ %l3 = load <vscale x 16 x i8>, ptr %ldptr, align 1
+ store <vscale x 16 x i8> %l3, ptr %stptr, align 1
+ ret void
+}
+
+define void @nxv8i16(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: st1h { z0.h }, p0, [x1]
+; CHECK-NEXT: ret
+ %l3 = load <vscale x 8 x i16>, ptr %ldptr, align 2
+ store <vscale x 8 x i16> %l3, ptr %stptr, align 2
+ ret void
+}
+
+define void @nxv4i32(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: st1w { z0.s }, p0, [x1]
+; CHECK-NEXT: ret
+ %l3 = load <vscale x 4 x i32>, ptr %ldptr, align 4
+ store <vscale x 4 x i32> %l3, ptr %stptr, align 4
+ ret void
+}
+
+define void @nxv2i64(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %l3 = load <vscale x 2 x i64>, ptr %ldptr, align 8
+ store <vscale x 2 x i64> %l3, ptr %stptr, align 8
+ ret void
+}
+
+define void @nxv16i1(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr p0, [x0]
+; CHECK-NEXT: str p0, [x1]
+; CHECK-NEXT: ret
+ %l3 = load <vscale x 16 x i1>, ptr %ldptr, align 2
+ store <vscale x 16 x i1> %l3, ptr %stptr, align 2
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll b/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll
new file mode 100644
index 00000000000000..27637800f751f4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: not --crash llc -mtriple=aarch64-linux-gnu -mattr=+sve,+strict-align < %s 2>&1 | FileCheck %s --check-prefix=CHECK-FIXME
+
+; REQUIRES: asserts
+
+; FIXME: Support TLI.expandUnalignedLoad()/TLI.expandUnalignedStore() for SVE.
+; CHECK-FIXME: LLVM ERROR: Invalid size request on a scalable vector.
+
+define void @unaligned_nxv16i1(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: unaligned_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr p0, [x0]
+; CHECK-NEXT: str p0, [x1]
+; CHECK-NEXT: ret
+ %l3 = load <vscale x 16 x i1>, ptr %ldptr, align 1
+ store <vscale x 16 x i1> %l3, ptr %stptr, align 1
+ ret void
+}
+
+define void @unaligned_nxv2i64(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: unaligned_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p0, [x1]
+; CHECK-NEXT: ret
+ %l3 = load <vscale x 2 x i64>, ptr %ldptr, align 4
+ store <vscale x 2 x i64> %l3, ptr %stptr, align 4
+ ret void
+}
More information about the llvm-commits
mailing list