[llvm] [GlobalISel] Allow Legalizer to lower volatile memcpy family. (PR #145997)
Pete Chou via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 11:25:02 PDT 2025
https://github.com/petechou updated https://github.com/llvm/llvm-project/pull/145997
>From 936aa775554b0f1de8d491818f785f747069d723 Mon Sep 17 00:00:00 2001
From: Pete Chou <petechou at gmail.com>
Date: Thu, 26 Jun 2025 15:45:00 -0700
Subject: [PATCH 1/2] [GlobalISel] Allow Legalizer to lower volatile memcpy
family.
This change updates legalizer to allow lowering volatile memcpy family
as a target might rely on lowering to legalize them. Also, legalizer
already lowers volatile G_MEMCPY_INLINE and has the capability to lower
memcpy family. For targets like aarch64 use legalizer to lower memcpy
family as a combiner optimization, the change adds an additional
argument for lowering to skip volatile and keep the existing behavior in
that case.
---
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 3 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 2 +-
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 7 +++--
.../AMDGPU/GlobalISel/legalize-memcpy.mir | 30 +++++++++++++++++++
.../GlobalISel/legalize-memcpyinline.mir | 30 +++++++++++++++++++
.../AMDGPU/GlobalISel/legalize-memmove.mir | 30 +++++++++++++++++++
.../AMDGPU/GlobalISel/legalize-memset.mir | 29 ++++++++++++++++++
7 files changed, 126 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index ea0873f41ebba..be8169c79f219 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -489,7 +489,8 @@ class LegalizerHelper {
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerMemcpyInline(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI,
- unsigned MaxLen = 0);
+ unsigned MaxLen = 0,
+ bool SkipVolatile = false);
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI);
};
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..9e203ce863639 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1704,7 +1704,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI,
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
- return Helper.lowerMemCpyFamily(MI, MaxLen) ==
+ return Helper.lowerMemCpyFamily(MI, MaxLen, /*SkipVolatile=*/true) ==
LegalizerHelper::LegalizeResult::Legalized;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b87b029d01632..b5af3c64d50fa 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -10066,7 +10066,8 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen,
+ bool SkipVolatile) {
const unsigned Opc = MI.getOpcode();
// This combine is fairly complex so it's not written with a separate
// matcher function.
@@ -10099,8 +10100,8 @@ LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
}
bool IsVolatile = MemOp->isVolatile();
- // Don't try to optimize volatile.
- if (IsVolatile)
+ // Don't try to optimize volatile when not allowed.
+ if (SkipVolatile && IsVolatile)
return UnableToLegalize;
if (MaxLen && KnownLen > MaxLen)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
index be3fe91407fdf..4f5f52b25cdf7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
@@ -31,3 +31,33 @@ body: |
S_ENDPGM 0
...
+---
+name: memcpy_test_volatile
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; CHECK-LABEL: name: memcpy_test_volatile
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (volatile load (s8))
+ ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (volatile store (s8))
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
+ %3:_(s32) = COPY $vgpr2
+ %4:_(s32) = COPY $vgpr3
+ %5:_(p0) = G_MERGE_VALUES %3:_(s32), %4:_(s32)
+ %6:_(s32) = G_CONSTANT i32 1
+ %7:_(s64) = G_ZEXT %6:_(s32)
+ G_MEMCPY %2:_(p0), %5:_(p0), %7:_(s64), 0 :: (volatile store (s8)), (volatile load (s8))
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
index a82ca30209820..0392aef6fe030 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
@@ -31,3 +31,33 @@ body: |
S_ENDPGM 0
...
+---
+name: memcpyinline_test_volatile
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; CHECK-LABEL: name: memcpyinline_test_volatile
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (volatile load (s8))
+ ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (volatile store (s8))
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
+ %3:_(s32) = COPY $vgpr2
+ %4:_(s32) = COPY $vgpr3
+ %5:_(p0) = G_MERGE_VALUES %3:_(s32), %4:_(s32)
+ %6:_(s32) = G_CONSTANT i32 1
+ %7:_(s64) = G_ZEXT %6:_(s32)
+ G_MEMCPY_INLINE %2:_(p0), %5:_(p0), %7:_(s64) :: (volatile store (s8)), (volatile load (s8))
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
index e7cfaab135beb..1f8d1aac24ebb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
@@ -31,3 +31,33 @@ body: |
S_ENDPGM 0
...
+---
+name: memmove_test_volatile
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; CHECK-LABEL: name: memmove_test_volatile
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (volatile load (s8))
+ ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (volatile store (s8))
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
+ %3:_(s32) = COPY $vgpr2
+ %4:_(s32) = COPY $vgpr3
+ %5:_(p0) = G_MERGE_VALUES %3:_(s32), %4:_(s32)
+ %6:_(s32) = G_CONSTANT i32 1
+ %7:_(s64) = G_ZEXT %6:_(s32)
+ G_MEMMOVE %2:_(p0), %5:_(p0), %7:_(s64), 0 :: (volatile store (s8)), (volatile load (s8))
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
index 021cebbb6cb49..dda94e1550585 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
@@ -30,3 +30,32 @@ body: |
S_ENDPGM 0
...
+---
+name: memset_test_volatile
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: memset_test_volatile
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[MV]](p0) :: (volatile store (s8))
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
+ %3:_(s32) = COPY $vgpr2
+ %4:_(s16) = G_TRUNC %3:_(s32)
+ %5:_(s8) = G_TRUNC %4:_(s16)
+ %6:_(s32) = G_CONSTANT i32 1
+ %7:_(s64) = G_ZEXT %6:_(s32)
+ G_MEMSET %2:_(p0), %5:_(s8), %7:_(s64), 0 :: (volatile store (s8))
+ S_ENDPGM 0
+
+...
>From 13f1a7bcd55ec1780d631eb1b906613aa82eb190 Mon Sep 17 00:00:00 2001
From: Pete Chou <petechou at gmail.com>
Date: Tue, 15 Jul 2025 09:48:59 -0700
Subject: [PATCH 2/2] address review comment
---
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 3 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 2 +-
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 9 +-
llvm/test/CodeGen/AArch64/aarch64-mops.ll | 188 +++++++-----------
4 files changed, 71 insertions(+), 131 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index be8169c79f219..ea0873f41ebba 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -489,8 +489,7 @@ class LegalizerHelper {
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerMemcpyInline(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI,
- unsigned MaxLen = 0,
- bool SkipVolatile = false);
+ unsigned MaxLen = 0);
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI);
};
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9e203ce863639..b1e851183de0d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1704,7 +1704,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI,
MachineIRBuilder HelperBuilder(MI);
GISelObserverWrapper DummyObserver;
LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
- return Helper.lowerMemCpyFamily(MI, MaxLen, /*SkipVolatile=*/true) ==
+ return Helper.lowerMemCpyFamily(MI, MaxLen) ==
LegalizerHelper::LegalizeResult::Legalized;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b5af3c64d50fa..a68f6542760cf 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -10066,8 +10066,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen,
- bool SkipVolatile) {
+LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
const unsigned Opc = MI.getOpcode();
// This combine is fairly complex so it's not written with a separate
// matcher function.
@@ -10099,14 +10098,10 @@ LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen,
return Legalized;
}
- bool IsVolatile = MemOp->isVolatile();
- // Don't try to optimize volatile when not allowed.
- if (SkipVolatile && IsVolatile)
- return UnableToLegalize;
-
if (MaxLen && KnownLen > MaxLen)
return UnableToLegalize;
+ bool IsVolatile = MemOp->isVolatile();
if (Opc == TargetOpcode::G_MEMCPY) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index ff7872c922e32..83530049a50d6 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -87,46 +87,17 @@ entry:
}
define void @memset_10_zeroval_volatile(ptr %dst) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_zeroval_volatile:
-; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w1, wzr
-; GISel-WITHOUT-MOPS-O0-NEXT: bl memset
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O0-NEXT: ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_zeroval_volatile:
-; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w1, wzr
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT: bl memset
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O3-NEXT: ret
-;
-; GISel-MOPS-O0-LABEL: memset_10_zeroval_volatile:
-; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT: mov x9, xzr
-; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: setm [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: sete [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: ret
+; GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile:
+; GISel-WITHOUT-MOPS: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT: str xzr, [x0]
+; GISel-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT: ret
;
-; GISel-MOPS-O3-LABEL: memset_10_zeroval_volatile:
-; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, xzr
-; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, xzr
-; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, xzr
-; GISel-MOPS-O3-NEXT: ret
+; GISel-MOPS-LABEL: memset_10_zeroval_volatile:
+; GISel-MOPS: // %bb.0: // %entry
+; GISel-MOPS-NEXT: str xzr, [x0]
+; GISel-MOPS-NEXT: strh wzr, [x0, #8]
+; GISel-MOPS-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval_volatile:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
@@ -490,43 +461,46 @@ entry:
define void @memset_10_volatile(ptr %dst, i32 %value) {
; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_volatile:
; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT: bl memset
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT: // implicit-def: $x8
+; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, w1
+; GISel-WITHOUT-MOPS-O0-NEXT: and x8, x8, #0xff
+; GISel-WITHOUT-MOPS-O0-NEXT: mov x9, #72340172838076673 // =0x101010101010101
+; GISel-WITHOUT-MOPS-O0-NEXT: mul x8, x8, x9
+; GISel-WITHOUT-MOPS-O0-NEXT: str x8, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT: // kill: def $w8 killed $w8 killed $x8
+; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8]
; GISel-WITHOUT-MOPS-O0-NEXT: ret
;
; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_volatile:
; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT: bl memset
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1
+; GISel-WITHOUT-MOPS-O3-NEXT: mov x8, #72340172838076673 // =0x101010101010101
+; GISel-WITHOUT-MOPS-O3-NEXT: and x9, x1, #0xff
+; GISel-WITHOUT-MOPS-O3-NEXT: mul x8, x9, x8
+; GISel-WITHOUT-MOPS-O3-NEXT: str x8, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT: strh w8, [x0, #8]
; GISel-WITHOUT-MOPS-O3-NEXT: ret
;
; GISel-MOPS-O0-LABEL: memset_10_volatile:
; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT: // implicit-def: $x9
-; GISel-MOPS-O0-NEXT: mov w9, w1
-; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: setm [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT: // implicit-def: $x8
+; GISel-MOPS-O0-NEXT: mov w8, w1
+; GISel-MOPS-O0-NEXT: and x8, x8, #0xff
+; GISel-MOPS-O0-NEXT: mov x9, #72340172838076673 // =0x101010101010101
+; GISel-MOPS-O0-NEXT: mul x8, x8, x9
+; GISel-MOPS-O0-NEXT: str x8, [x0]
+; GISel-MOPS-O0-NEXT: // kill: def $w8 killed $w8 killed $x8
+; GISel-MOPS-O0-NEXT: strh w8, [x0, #8]
; GISel-MOPS-O0-NEXT: ret
;
; GISel-MOPS-O3-LABEL: memset_10_volatile:
; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa
; GISel-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1
-; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, x1
-; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, x1
-; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT: mov x8, #72340172838076673 // =0x101010101010101
+; GISel-MOPS-O3-NEXT: and x9, x1, #0xff
+; GISel-MOPS-O3-NEXT: mul x8, x9, x8
+; GISel-MOPS-O3-NEXT: str x8, [x0]
+; GISel-MOPS-O3-NEXT: strh w8, [x0, #8]
; GISel-MOPS-O3-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile:
@@ -905,43 +879,21 @@ entry:
}
define void @memcpy_10_volatile(ptr %dst, ptr %src, i32 %value) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_10_volatile:
-; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT: bl memcpy
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O0-NEXT: ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_10_volatile:
-; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT: bl memcpy
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O3-NEXT: ret
-;
-; GISel-MOPS-O0-LABEL: memcpy_10_volatile:
-; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT: cpyfp [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: cpyfm [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: cpyfe [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: ret
+; GISel-WITHOUT-MOPS-LABEL: memcpy_10_volatile:
+; GISel-WITHOUT-MOPS: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1]
+; GISel-WITHOUT-MOPS-NEXT: str x8, [x0]
+; GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT: ret
;
-; GISel-MOPS-O3-LABEL: memcpy_10_volatile:
-; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT: cpyfp [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: cpyfm [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: cpyfe [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: ret
+; GISel-MOPS-LABEL: memcpy_10_volatile:
+; GISel-MOPS: // %bb.0: // %entry
+; GISel-MOPS-NEXT: ldr x8, [x1]
+; GISel-MOPS-NEXT: str x8, [x0]
+; GISel-MOPS-NEXT: ldrh w8, [x1, #8]
+; GISel-MOPS-NEXT: strh w8, [x0, #8]
+; GISel-MOPS-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_10_volatile:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
@@ -1736,40 +1688,34 @@ entry:
define void @memmove_10_volatile(ptr %dst, ptr %src, i32 %value) {
; GISel-WITHOUT-MOPS-O0-LABEL: memmove_10_volatile:
; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT: bl memmove
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT: ldr x9, [x1]
+; GISel-WITHOUT-MOPS-O0-NEXT: ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-O0-NEXT: str x9, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8]
; GISel-WITHOUT-MOPS-O0-NEXT: ret
;
; GISel-WITHOUT-MOPS-O3-LABEL: memmove_10_volatile:
; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT: bl memmove
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT: ldr x8, [x1]
+; GISel-WITHOUT-MOPS-O3-NEXT: ldrh w9, [x1, #8]
+; GISel-WITHOUT-MOPS-O3-NEXT: str x8, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT: strh w9, [x0, #8]
; GISel-WITHOUT-MOPS-O3-NEXT: ret
;
; GISel-MOPS-O0-LABEL: memmove_10_volatile:
; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT: cpyp [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: cpym [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT: ldr x9, [x1]
+; GISel-MOPS-O0-NEXT: ldrh w8, [x1, #8]
+; GISel-MOPS-O0-NEXT: str x9, [x0]
+; GISel-MOPS-O0-NEXT: strh w8, [x0, #8]
; GISel-MOPS-O0-NEXT: ret
;
; GISel-MOPS-O3-LABEL: memmove_10_volatile:
; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT: cpyp [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: cpym [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT: ldr x8, [x1]
+; GISel-MOPS-O3-NEXT: ldrh w9, [x1, #8]
+; GISel-MOPS-O3-NEXT: str x8, [x0]
+; GISel-MOPS-O3-NEXT: strh w9, [x0, #8]
; GISel-MOPS-O3-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_10_volatile:
More information about the llvm-commits
mailing list