[llvm] [GlobalISel] Allow Legalizer to lower volatile memcpy family. (PR #145997)

Pete Chou via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 15 11:25:02 PDT 2025


https://github.com/petechou updated https://github.com/llvm/llvm-project/pull/145997

>From 936aa775554b0f1de8d491818f785f747069d723 Mon Sep 17 00:00:00 2001
From: Pete Chou <petechou at gmail.com>
Date: Thu, 26 Jun 2025 15:45:00 -0700
Subject: [PATCH 1/2] [GlobalISel] Allow Legalizer to lower volatile memcpy
 family.

This change updates legalizer to allow lowering volatile memcpy family
as a target might rely on lowering to legalize them. Also, legalizer
already lowers volatile G_MEMCPY_INLINE and has the capability to lower
memcpy family. For targets like aarch64 use legalizer to lower memcpy
family as a combiner optimization, the change adds an additional
argument for lowering to skip volatile and keep the existing behavior in
that case.
---
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |  3 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  2 +-
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  7 +++--
 .../AMDGPU/GlobalISel/legalize-memcpy.mir     | 30 +++++++++++++++++++
 .../GlobalISel/legalize-memcpyinline.mir      | 30 +++++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-memmove.mir    | 30 +++++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-memset.mir     | 29 ++++++++++++++++++
 7 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index ea0873f41ebba..be8169c79f219 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -489,7 +489,8 @@ class LegalizerHelper {
   LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI);
   LLVM_ABI LegalizeResult lowerMemcpyInline(MachineInstr &MI);
   LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI,
-                                            unsigned MaxLen = 0);
+                                            unsigned MaxLen = 0,
+                                            bool SkipVolatile = false);
   LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI);
 };
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..9e203ce863639 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1704,7 +1704,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI,
   MachineIRBuilder HelperBuilder(MI);
   GISelObserverWrapper DummyObserver;
   LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
-  return Helper.lowerMemCpyFamily(MI, MaxLen) ==
+  return Helper.lowerMemCpyFamily(MI, MaxLen, /*SkipVolatile=*/true) ==
          LegalizerHelper::LegalizeResult::Legalized;
 }
 
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b87b029d01632..b5af3c64d50fa 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -10066,7 +10066,8 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
 }
 
 LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen,
+                                   bool SkipVolatile) {
   const unsigned Opc = MI.getOpcode();
   // This combine is fairly complex so it's not written with a separate
   // matcher function.
@@ -10099,8 +10100,8 @@ LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
   }
 
   bool IsVolatile = MemOp->isVolatile();
-  // Don't try to optimize volatile.
-  if (IsVolatile)
+  // Don't try to optimize volatile when not allowed.
+  if (SkipVolatile && IsVolatile)
     return UnableToLegalize;
 
   if (MaxLen && KnownLen > MaxLen)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
index be3fe91407fdf..4f5f52b25cdf7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpy.mir
@@ -31,3 +31,33 @@ body:             |
     S_ENDPGM 0
 
 ...
+---
+name:            memcpy_test_volatile
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; CHECK-LABEL: name: memcpy_test_volatile
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (volatile load (s8))
+    ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (volatile store (s8))
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
+    %3:_(s32) = COPY $vgpr2
+    %4:_(s32) = COPY $vgpr3
+    %5:_(p0) = G_MERGE_VALUES %3:_(s32), %4:_(s32)
+    %6:_(s32) = G_CONSTANT i32 1
+    %7:_(s64) = G_ZEXT %6:_(s32)
+    G_MEMCPY %2:_(p0), %5:_(p0), %7:_(s64), 0 :: (volatile store (s8)), (volatile load (s8))
+    S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
index a82ca30209820..0392aef6fe030 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memcpyinline.mir
@@ -31,3 +31,33 @@ body:             |
     S_ENDPGM 0
 
 ...
+---
+name:            memcpyinline_test_volatile
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; CHECK-LABEL: name: memcpyinline_test_volatile
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (volatile load (s8))
+    ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (volatile store (s8))
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
+    %3:_(s32) = COPY $vgpr2
+    %4:_(s32) = COPY $vgpr3
+    %5:_(p0) = G_MERGE_VALUES %3:_(s32), %4:_(s32)
+    %6:_(s32) = G_CONSTANT i32 1
+    %7:_(s64) = G_ZEXT %6:_(s32)
+    G_MEMCPY_INLINE %2:_(p0), %5:_(p0), %7:_(s64) :: (volatile store (s8)), (volatile load (s8))
+    S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
index e7cfaab135beb..1f8d1aac24ebb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memmove.mir
@@ -31,3 +31,33 @@ body:             |
     S_ENDPGM 0
 
 ...
+---
+name:            memmove_test_volatile
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; CHECK-LABEL: name: memmove_test_volatile
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV1]](p0) :: (volatile load (s8))
+    ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV]](p0) :: (volatile store (s8))
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
+    %3:_(s32) = COPY $vgpr2
+    %4:_(s32) = COPY $vgpr3
+    %5:_(p0) = G_MERGE_VALUES %3:_(s32), %4:_(s32)
+    %6:_(s32) = G_CONSTANT i32 1
+    %7:_(s64) = G_ZEXT %6:_(s32)
+    G_MEMMOVE %2:_(p0), %5:_(p0), %7:_(s64), 0 :: (volatile store (s8)), (volatile load (s8))
+    S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
index 021cebbb6cb49..dda94e1550585 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-memset.mir
@@ -30,3 +30,32 @@ body:             |
     S_ENDPGM 0
 
 ...
+---
+name:            memset_test_volatile
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: memset_test_volatile
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
+    ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[MV]](p0) :: (volatile store (s8))
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(p0) = G_MERGE_VALUES %0:_(s32), %1:_(s32)
+    %3:_(s32) = COPY $vgpr2
+    %4:_(s16) = G_TRUNC %3:_(s32)
+    %5:_(s8) = G_TRUNC %4:_(s16)
+    %6:_(s32) = G_CONSTANT i32 1
+    %7:_(s64) = G_ZEXT %6:_(s32)
+    G_MEMSET %2:_(p0), %5:_(s8), %7:_(s64), 0 :: (volatile store (s8))
+    S_ENDPGM 0
+
+...

>From 13f1a7bcd55ec1780d631eb1b906613aa82eb190 Mon Sep 17 00:00:00 2001
From: Pete Chou <petechou at gmail.com>
Date: Tue, 15 Jul 2025 09:48:59 -0700
Subject: [PATCH 2/2] address review comment

---
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |   3 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |   2 +-
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   9 +-
 llvm/test/CodeGen/AArch64/aarch64-mops.ll     | 188 +++++++-----------
 4 files changed, 71 insertions(+), 131 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index be8169c79f219..ea0873f41ebba 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -489,8 +489,7 @@ class LegalizerHelper {
   LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI);
   LLVM_ABI LegalizeResult lowerMemcpyInline(MachineInstr &MI);
   LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI,
-                                            unsigned MaxLen = 0,
-                                            bool SkipVolatile = false);
+                                            unsigned MaxLen = 0);
   LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI);
 };
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9e203ce863639..b1e851183de0d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1704,7 +1704,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI,
   MachineIRBuilder HelperBuilder(MI);
   GISelObserverWrapper DummyObserver;
   LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
-  return Helper.lowerMemCpyFamily(MI, MaxLen, /*SkipVolatile=*/true) ==
+  return Helper.lowerMemCpyFamily(MI, MaxLen) ==
          LegalizerHelper::LegalizeResult::Legalized;
 }
 
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b5af3c64d50fa..a68f6542760cf 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -10066,8 +10066,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
 }
 
 LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen,
-                                   bool SkipVolatile) {
+LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
   const unsigned Opc = MI.getOpcode();
   // This combine is fairly complex so it's not written with a separate
   // matcher function.
@@ -10099,14 +10098,10 @@ LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen,
     return Legalized;
   }
 
-  bool IsVolatile = MemOp->isVolatile();
-  // Don't try to optimize volatile when not allowed.
-  if (SkipVolatile && IsVolatile)
-    return UnableToLegalize;
-
   if (MaxLen && KnownLen > MaxLen)
     return UnableToLegalize;
 
+  bool IsVolatile = MemOp->isVolatile();
   if (Opc == TargetOpcode::G_MEMCPY) {
     auto &MF = *MI.getParent()->getParent();
     const auto &TLI = *MF.getSubtarget().getTargetLowering();
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index ff7872c922e32..83530049a50d6 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -87,46 +87,17 @@ entry:
 }
 
 define void @memset_10_zeroval_volatile(ptr %dst) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_zeroval_volatile:
-; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT:    mov w1, wzr
-; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
-; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O0-NEXT:    ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_zeroval_volatile:
-; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT:    mov w1, wzr
-; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
-; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O3-NEXT:    ret
-;
-; GISel-MOPS-O0-LABEL: memset_10_zeroval_volatile:
-; GISel-MOPS-O0:       // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT:    mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT:    mov x9, xzr
-; GISel-MOPS-O0-NEXT:    setp [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT:    setm [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT:    sete [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT:    ret
+; GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    str xzr, [x0]
+; GISel-WITHOUT-MOPS-NEXT:    strh wzr, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT:    ret
 ;
-; GISel-MOPS-O3-LABEL: memset_10_zeroval_volatile:
-; GISel-MOPS-O3:       // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT:    mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, xzr
-; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, xzr
-; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, xzr
-; GISel-MOPS-O3-NEXT:    ret
+; GISel-MOPS-LABEL: memset_10_zeroval_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    str xzr, [x0]
+; GISel-MOPS-NEXT:    strh wzr, [x0, #8]
+; GISel-MOPS-NEXT:    ret
 ;
 ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval_volatile:
 ; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
@@ -490,43 +461,46 @@ entry:
 define void @memset_10_volatile(ptr %dst, i32 %value) {
 ; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_volatile:
 ; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT:    bl memset
-; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    // implicit-def: $x8
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, w1
+; GISel-WITHOUT-MOPS-O0-NEXT:    and x8, x8, #0xff
+; GISel-WITHOUT-MOPS-O0-NEXT:    mov x9, #72340172838076673 // =0x101010101010101
+; GISel-WITHOUT-MOPS-O0-NEXT:    mul x8, x8, x9
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT:    // kill: def $w8 killed $w8 killed $x8
+; GISel-WITHOUT-MOPS-O0-NEXT:    strh w8, [x0, #8]
 ; GISel-WITHOUT-MOPS-O0-NEXT:    ret
 ;
 ; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_volatile:
 ; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT:    bl memset
-; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GISel-WITHOUT-MOPS-O3-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; GISel-WITHOUT-MOPS-O3-NEXT:    and x9, x1, #0xff
+; GISel-WITHOUT-MOPS-O3-NEXT:    mul x8, x9, x8
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT:    strh w8, [x0, #8]
 ; GISel-WITHOUT-MOPS-O3-NEXT:    ret
 ;
 ; GISel-MOPS-O0-LABEL: memset_10_volatile:
 ; GISel-MOPS-O0:       // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT:    mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT:    // implicit-def: $x9
-; GISel-MOPS-O0-NEXT:    mov w9, w1
-; GISel-MOPS-O0-NEXT:    setp [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT:    setm [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT:    sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT:    // implicit-def: $x8
+; GISel-MOPS-O0-NEXT:    mov w8, w1
+; GISel-MOPS-O0-NEXT:    and x8, x8, #0xff
+; GISel-MOPS-O0-NEXT:    mov x9, #72340172838076673 // =0x101010101010101
+; GISel-MOPS-O0-NEXT:    mul x8, x8, x9
+; GISel-MOPS-O0-NEXT:    str x8, [x0]
+; GISel-MOPS-O0-NEXT:    // kill: def $w8 killed $w8 killed $x8
+; GISel-MOPS-O0-NEXT:    strh w8, [x0, #8]
 ; GISel-MOPS-O0-NEXT:    ret
 ;
 ; GISel-MOPS-O3-LABEL: memset_10_volatile:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT:    mov w8, #10 // =0xa
 ; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
-; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
-; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
-; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; GISel-MOPS-O3-NEXT:    and x9, x1, #0xff
+; GISel-MOPS-O3-NEXT:    mul x8, x9, x8
+; GISel-MOPS-O3-NEXT:    str x8, [x0]
+; GISel-MOPS-O3-NEXT:    strh w8, [x0, #8]
 ; GISel-MOPS-O3-NEXT:    ret
 ;
 ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile:
@@ -905,43 +879,21 @@ entry:
 }
 
 define void @memcpy_10_volatile(ptr %dst, ptr %src, i32 %value) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_10_volatile:
-; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT:    bl memcpy
-; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O0-NEXT:    ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_10_volatile:
-; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT:    bl memcpy
-; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O3-NEXT:    ret
-;
-; GISel-MOPS-O0-LABEL: memcpy_10_volatile:
-; GISel-MOPS-O0:       // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT:    mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT:    cpyfp [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT:    cpyfm [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT:    cpyfe [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT:    ret
+; GISel-WITHOUT-MOPS-LABEL: memcpy_10_volatile:
+; GISel-WITHOUT-MOPS:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT:    ldr x8, [x1]
+; GISel-WITHOUT-MOPS-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-NEXT:    ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-NEXT:    strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT:    ret
 ;
-; GISel-MOPS-O3-LABEL: memcpy_10_volatile:
-; GISel-MOPS-O3:       // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT:    mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT:    cpyfp [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT:    cpyfm [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT:    cpyfe [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT:    ret
+; GISel-MOPS-LABEL: memcpy_10_volatile:
+; GISel-MOPS:       // %bb.0: // %entry
+; GISel-MOPS-NEXT:    ldr x8, [x1]
+; GISel-MOPS-NEXT:    str x8, [x0]
+; GISel-MOPS-NEXT:    ldrh w8, [x1, #8]
+; GISel-MOPS-NEXT:    strh w8, [x0, #8]
+; GISel-MOPS-NEXT:    ret
 ;
 ; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_10_volatile:
 ; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
@@ -1736,40 +1688,34 @@ entry:
 define void @memmove_10_volatile(ptr %dst, ptr %src, i32 %value) {
 ; GISel-WITHOUT-MOPS-O0-LABEL: memmove_10_volatile:
 ; GISel-WITHOUT-MOPS-O0:       // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT:    .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT:    mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT:    mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT:    bl memmove
-; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldr x9, [x1]
+; GISel-WITHOUT-MOPS-O0-NEXT:    ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-O0-NEXT:    str x9, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT:    strh w8, [x0, #8]
 ; GISel-WITHOUT-MOPS-O0-NEXT:    ret
 ;
 ; GISel-WITHOUT-MOPS-O3-LABEL: memmove_10_volatile:
 ; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT:    .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT:    mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT:    bl memmove
-; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldr x8, [x1]
+; GISel-WITHOUT-MOPS-O3-NEXT:    ldrh w9, [x1, #8]
+; GISel-WITHOUT-MOPS-O3-NEXT:    str x8, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT:    strh w9, [x0, #8]
 ; GISel-WITHOUT-MOPS-O3-NEXT:    ret
 ;
 ; GISel-MOPS-O0-LABEL: memmove_10_volatile:
 ; GISel-MOPS-O0:       // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT:    mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT:    // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT:    cpyp [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT:    cpym [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT:    cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT:    ldr x9, [x1]
+; GISel-MOPS-O0-NEXT:    ldrh w8, [x1, #8]
+; GISel-MOPS-O0-NEXT:    str x9, [x0]
+; GISel-MOPS-O0-NEXT:    strh w8, [x0, #8]
 ; GISel-MOPS-O0-NEXT:    ret
 ;
 ; GISel-MOPS-O3-LABEL: memmove_10_volatile:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT:    mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT:    cpyp [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT:    cpym [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT:    cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT:    ldr x8, [x1]
+; GISel-MOPS-O3-NEXT:    ldrh w9, [x1, #8]
+; GISel-MOPS-O3-NEXT:    str x8, [x0]
+; GISel-MOPS-O3-NEXT:    strh w9, [x0, #8]
 ; GISel-MOPS-O3-NEXT:    ret
 ;
 ; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_10_volatile:



More information about the llvm-commits mailing list