[llvm] [AArch64] Increase inline memmove limit to 16 stored registers (PR #111848)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 07:54:43 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/111848
The memcpy inline limit has been 16 for a long time, this patch makes the memmove inline limit the same, allowing small-constant sized memmoves to be emitted inline. The 16 is the number of registers stored, which equates to a limit of 256 bytes.
>From d4e59da5e09d7dc4f14ce22ca57fda52c653c5a3 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 10 Oct 2024 14:41:54 +0100
Subject: [PATCH] [AArch64] Increase inline memmove limit to 16 stored
registers
The memcpy inline limit has been 16 for a long time, this patch makes the
memmove inline limit the same, allowing small-constant sized memmoves to be
emitted inline.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +-
.../AArch64/GlobalISel/inline-memmove.mir | 169 +++++++++++-------
llvm/test/CodeGen/AArch64/memmove-inline.ll | 122 +++++++++++++
3 files changed, 226 insertions(+), 68 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/memmove-inline.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c1aefee3793c96..2fa33cfa025696 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1143,7 +1143,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
MaxStoresPerMemmoveOptSize = 4;
- MaxStoresPerMemmove = 4;
+ MaxStoresPerMemmove =
+ Subtarget->requiresStrictAlign() ? MaxStoresPerMemmoveOptSize : 16;
MaxLoadsPerMemcmpOptSize = 4;
MaxLoadsPerMemcmp =
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
index f31b64ece89572..27d09b23625675 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
@@ -61,11 +61,12 @@ body: |
; CHECK-LABEL: name: test_memmove1
; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
- ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK-NEXT: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s64) = COPY $x2
@@ -83,23 +84,24 @@ body: |
; CHECK-LABEL: name: test_memmove2_const
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
- ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
- ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
- ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p0) :: (store (s128) into %ir.0 + 16, align 4)
- ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
- ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
+ ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p0) :: (store (s128) into %ir.0 + 16, align 4)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s64) = G_CONSTANT i64 48
@@ -117,11 +119,42 @@ body: |
; CHECK-LABEL: name: test_memmove3_const_toolarge
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
- ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from %ir.1 + 48, align 4)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+ ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD3]](p0) :: (load (s128) from %ir.1 + 64, align 4)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
+ ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
+ ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load (s128) from %ir.1 + 80, align 4)
+ ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD5]](p0) :: (store (s128) into %ir.0 + 16, align 4)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD6]](p0) :: (store (s128) into %ir.0 + 32, align 4)
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+ ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD3]](s128), [[PTR_ADD7]](p0) :: (store (s128) into %ir.0 + 48, align 4)
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+ ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD4]](s128), [[PTR_ADD8]](p0) :: (store (s128) into %ir.0 + 64, align 4)
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
+ ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store (s128) into %ir.0 + 80, align 4)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s64) = G_CONSTANT i64 96
@@ -139,29 +172,30 @@ body: |
; CHECK-LABEL: name: test_memmove4_const_unaligned
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
- ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
- ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
- ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
- ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.1 + 48)
- ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
- ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 16, align 4)
- ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
- ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
- ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD4]](p0) :: (store (s128) into %ir.0 + 32, align 4)
- ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
- ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.0 + 48)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+ ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.1 + 48)
+ ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 16, align 4)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD4]](p0) :: (store (s128) into %ir.0 + 32, align 4)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+ ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.0 + 48)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%2:_(s64) = G_CONSTANT i64 52
@@ -179,23 +213,24 @@ body: |
; CHECK-LABEL: name: test_memmove_addrspace
; CHECK: liveins: $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1
- ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64)
- ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64)
- ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2)
- ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1)
- ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
- ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1)
- ; CHECK: RET_ReallyLR
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64)
+ ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+ ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2)
+ ; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+ ; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1)
+ ; CHECK-NEXT: RET_ReallyLR
%0:_(p1) = COPY $x0
%1:_(p2) = COPY $x1
%2:_(s64) = G_CONSTANT i64 48
diff --git a/llvm/test/CodeGen/AArch64/memmove-inline.ll b/llvm/test/CodeGen/AArch64/memmove-inline.ll
new file mode 100644
index 00000000000000..641c48dd0f1c54
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/memmove-inline.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ALIGNED
+; RUN: llc -mtriple=aarch64 -mattr=+strict-align < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNALIGNED
+
+; Small (16 bytes here) unaligned memmove() should be a function call if
+; strict-alignment is turned on.
+define void @t16(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: t16:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldr q0, [x1]
+; CHECK-ALIGNED-NEXT: str q0, [x0]
+; CHECK-ALIGNED-NEXT: ret
+;
+; CHECK-UNALIGNED-LABEL: t16:
+; CHECK-UNALIGNED: // %bb.0: // %entry
+; CHECK-UNALIGNED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-UNALIGNED-NEXT: .cfi_def_cfa_offset 16
+; CHECK-UNALIGNED-NEXT: .cfi_offset w30, -16
+; CHECK-UNALIGNED-NEXT: mov w2, #16 // =0x10
+; CHECK-UNALIGNED-NEXT: bl memmove
+; CHECK-UNALIGNED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-UNALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
+ ret void
+}
+
+; Small (16 bytes here) aligned memmove() should be inlined even if
+; strict-alignment is turned on.
+define void @t16_aligned(ptr align 8 %out, ptr align 8 %in) {
+; CHECK-ALIGNED-LABEL: t16_aligned:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldr q0, [x1]
+; CHECK-ALIGNED-NEXT: str q0, [x0]
+; CHECK-ALIGNED-NEXT: ret
+;
+; CHECK-UNALIGNED-LABEL: t16_aligned:
+; CHECK-UNALIGNED: // %bb.0: // %entry
+; CHECK-UNALIGNED-NEXT: ldp x9, x8, [x1]
+; CHECK-UNALIGNED-NEXT: stp x9, x8, [x0]
+; CHECK-UNALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
+ ret void
+}
+
+; Tiny (4 bytes here) unaligned memmove() should be inlined with byte sized
+; loads and stores if strict-alignment is turned on.
+define void @t4(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: t4:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldr w8, [x1]
+; CHECK-ALIGNED-NEXT: str w8, [x0]
+; CHECK-ALIGNED-NEXT: ret
+;
+; CHECK-UNALIGNED-LABEL: t4:
+; CHECK-UNALIGNED: // %bb.0: // %entry
+; CHECK-UNALIGNED-NEXT: ldrb w8, [x1, #3]
+; CHECK-UNALIGNED-NEXT: ldrb w9, [x1, #2]
+; CHECK-UNALIGNED-NEXT: ldrb w10, [x1]
+; CHECK-UNALIGNED-NEXT: ldrb w11, [x1, #1]
+; CHECK-UNALIGNED-NEXT: strb w8, [x0, #3]
+; CHECK-UNALIGNED-NEXT: strb w9, [x0, #2]
+; CHECK-UNALIGNED-NEXT: strb w11, [x0, #1]
+; CHECK-UNALIGNED-NEXT: strb w10, [x0]
+; CHECK-UNALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
+ ret void
+}
+
+define void @t256(ptr %out, ptr %in) {
+; CHECK-ALIGNED-LABEL: t256:
+; CHECK-ALIGNED: // %bb.0: // %entry
+; CHECK-ALIGNED-NEXT: ldp q0, q1, [x1]
+; CHECK-ALIGNED-NEXT: ldp q2, q3, [x1, #32]
+; CHECK-ALIGNED-NEXT: ldp q4, q5, [x1, #64]
+; CHECK-ALIGNED-NEXT: ldp q6, q7, [x1, #96]
+; CHECK-ALIGNED-NEXT: ldp q16, q17, [x1, #224]
+; CHECK-ALIGNED-NEXT: ldp q18, q19, [x1, #128]
+; CHECK-ALIGNED-NEXT: ldp q20, q21, [x1, #160]
+; CHECK-ALIGNED-NEXT: ldp q22, q23, [x1, #192]
+; CHECK-ALIGNED-NEXT: stp q0, q1, [x0]
+; CHECK-ALIGNED-NEXT: stp q2, q3, [x0, #32]
+; CHECK-ALIGNED-NEXT: stp q4, q5, [x0, #64]
+; CHECK-ALIGNED-NEXT: stp q6, q7, [x0, #96]
+; CHECK-ALIGNED-NEXT: stp q18, q19, [x0, #128]
+; CHECK-ALIGNED-NEXT: stp q20, q21, [x0, #160]
+; CHECK-ALIGNED-NEXT: stp q22, q23, [x0, #192]
+; CHECK-ALIGNED-NEXT: stp q16, q17, [x0, #224]
+; CHECK-ALIGNED-NEXT: ret
+;
+; CHECK-UNALIGNED-LABEL: t256:
+; CHECK-UNALIGNED: // %bb.0: // %entry
+; CHECK-UNALIGNED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-UNALIGNED-NEXT: .cfi_def_cfa_offset 16
+; CHECK-UNALIGNED-NEXT: .cfi_offset w30, -16
+; CHECK-UNALIGNED-NEXT: mov w2, #256 // =0x100
+; CHECK-UNALIGNED-NEXT: bl memmove
+; CHECK-UNALIGNED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-UNALIGNED-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 256, i1 false)
+ ret void
+}
+
+define void @t257(ptr %out, ptr %in) {
+; CHECK-LABEL: t257:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov w2, #257 // =0x101
+; CHECK-NEXT: bl memmove
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 257, i1 false)
+ ret void
+}
+
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
More information about the llvm-commits
mailing list