[clang] [llvm] [SelectionDAG][PowerPC] Add an intrinsic for memcmp. (PR #118178)

Stefan Pintilie via cfe-commits cfe-commits at lists.llvm.org
Sat Nov 30 08:13:05 PST 2024


https://github.com/stefanp-ibm created https://github.com/llvm/llvm-project/pull/118178

In the backend we want to provide special handling for the function memcmp. This patch adds an intrinsic so that the backend will recognize it as more than just a regular function call.

This patch also adds special handling for PowerPC on AIX.

>From 11868cec3a03be41dbf0201d95bfd2a5416a0ca5 Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Fri, 29 Nov 2024 15:46:46 -0500
Subject: [PATCH] [SelectionDAG][PowerPC] Add an intrinsic for memcmp.

In the backend we want to provide special handling for the function
memcmp. This patch adds an intrinsic so that the backend will recognize
it as more than just a regular function call.

This patch also adds special handling for PowerPC on AIX.
---
 clang/lib/CodeGen/CGBuiltin.cpp               |  13 ++
 clang/test/CodeGen/builtin-memfns.c           |  53 +++---
 clang/test/CodeGen/debug-info-extern-call.c   |  10 +-
 llvm/include/llvm/CodeGen/SelectionDAG.h      |   6 +
 llvm/include/llvm/IR/Intrinsics.td            |   6 +
 llvm/include/llvm/IR/RuntimeLibcalls.def      |   1 +
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  51 ++++++
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  20 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   1 +
 llvm/test/CodeGen/PowerPC/memintr.ll          | 151 ++++++++++++++++++
 10 files changed, 288 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/memintr.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cb9c23b8e0a0d0..0688f4064d788b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4483,6 +4483,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(nullptr);
   }
 
+  case Builtin::BImemcmp: {
+    Address Src = EmitPointerWithAlignment(E->getArg(0));
+    Address Dst = EmitPointerWithAlignment(E->getArg(1));
+    Value *SizeVal = EmitScalarExpr(E->getArg(2));
+    llvm::Type *Tys[] = {Dst.getBasePointer()->getType(),
+                         Src.getBasePointer()->getType(), SizeVal->getType()};
+
+    Function *F = CGM.getIntrinsic(Intrinsic::memcmp, Tys);
+    Value *Mem1Value = EmitScalarExpr(E->getArg(0));
+    Value *Mem2Value = EmitScalarExpr(E->getArg(1));
+    Value *Args[] = {Mem1Value, Mem2Value, SizeVal};
+    return RValue::get(Builder.CreateCall(F, Args));
+  }
   case Builtin::BImemcpy:
   case Builtin::BI__builtin_memcpy:
   case Builtin::BImempcpy:
diff --git a/clang/test/CodeGen/builtin-memfns.c b/clang/test/CodeGen/builtin-memfns.c
index 23c3c60b779b37..51c5ad43cc7ef2 100644
--- a/clang/test/CodeGen/builtin-memfns.c
+++ b/clang/test/CodeGen/builtin-memfns.c
@@ -1,16 +1,23 @@
-// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm < %s| FileCheck %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm < %s| FileCheck %s --check-prefixes=CHECK,CHECK32
+// RUN: %clang_cc1 -triple ppc -emit-llvm < %s| FileCheck %s --check-prefixes=CHECK,CHECK32
+// RUN: %clang_cc1 -triple ppc64 -emit-llvm < %s| FileCheck %s --check-prefixes=CHECK,CHECK64
 
 typedef __WCHAR_TYPE__ wchar_t;
 typedef __SIZE_TYPE__ size_t;
 
 void *memcpy(void *, void const *, size_t);
 void *memccpy(void *, void const *, int, size_t);
-
-// CHECK: @test1
-// CHECK: call void @llvm.memset.p0.i32
-// CHECK: call void @llvm.memset.p0.i32
-// CHECK: call void @llvm.memcpy.p0.p0.i32
-// CHECK: call void @llvm.memmove.p0.p0.i32
+int memcmp(const void *, const void *, size_t);
+
+// CHECK-LABEL: @test1
+// CHECK32: call void @llvm.memset.p0.i32
+// CHECK64: call void @llvm.memset.p0.i64
+// CHECK32: call void @llvm.memset.p0.i32
+// CHECK64: call void @llvm.memset.p0.i64
+// CHECK32: call void @llvm.memcpy.p0.p0.i32
+// CHECK64: call void @llvm.memcpy.p0.p0.i64
+// CHECK32: call void @llvm.memmove.p0.p0.i32
+// CHECK64: call void @llvm.memmove.p0.p0.i64
 // CHECK-NOT: __builtin
 // CHECK: ret
 int test1(int argc, char **argv) {
@@ -23,37 +30,38 @@ int test1(int argc, char **argv) {
   return 0;
 }
 
-// CHECK: @test2
-// CHECK: call void @llvm.memcpy.p0.p0.i32
+// CHECK-LABEL: @test2
+// CHECK32: call void @llvm.memcpy.p0.p0.i32
+// CHECK64: call void @llvm.memcpy.p0.p0.i64
 char* test2(char* a, char* b) {
   return __builtin_memcpy(a, b, 4);
 }
 
-// CHECK: @test3
+// CHECK-LABEL: @test3
 // CHECK: call void @llvm.memset
 void test3(char *P) {
   __builtin___memset_chk(P, 42, 128, 128);
 }
 
-// CHECK: @test4
+// CHECK-LABEL: @test4
 // CHECK: call void @llvm.memcpy
 void test4(char *P, char *Q) {
   __builtin___memcpy_chk(P, Q, 128, 128);
 }
 
-// CHECK: @test5
+// CHECK-LABEL: @test5
 // CHECK: call void @llvm.memmove
 void test5(char *P, char *Q) {
   __builtin___memmove_chk(P, Q, 128, 128);
 }
 
-// CHECK: @test6
+// CHECK-LABEL: @test6
 // CHECK: call void @llvm.memcpy
 int test6(char *X) {
   return __builtin___memcpy_chk(X, X, 42, 42) != 0;
 }
 
-// CHECK: @test7
+// CHECK-LABEL: @test7
 // PR12094
 int test7(int *p) {
   struct snd_pcm_hw_params_t* hwparams;  // incomplete type.
@@ -75,14 +83,14 @@ struct PS {
 } __attribute__((packed));
 struct PS ps;
 void test8(int *arg) {
-  // CHECK: @test8
+  // CHECK-LABEL: @test8
   // CHECK: call void @llvm.memcpy{{.*}} align 4 {{.*}} align 1 {{.*}} 16, i1 false)
   __builtin_memcpy(arg, ps.modes, sizeof(struct PS));
 }
 
 __attribute((aligned(16))) int x[4], y[4];
 void test9(void) {
-  // CHECK: @test9
+  // CHECK-LABEL: @test9
   // CHECK: call void @llvm.memcpy{{.*}} align 16 {{.*}} align 16 {{.*}} 16, i1 false)
   __builtin_memcpy(x, y, sizeof(y));
 }
@@ -93,10 +101,12 @@ wchar_t src;
 // CHECK-LABEL: @test10
 // FIXME: Consider lowering these to llvm.memcpy / llvm.memmove.
 void test10(void) {
-  // CHECK: call ptr @wmemcpy(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+  // CHECK32: call ptr @wmemcpy(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+  // CHECK64: call ptr @wmemcpy(ptr noundef @dest, ptr noundef @src, i64 noundef 4)
   __builtin_wmemcpy(&dest, &src, 4);
 
-  // CHECK: call ptr @wmemmove(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+  // CHECK32: call ptr @wmemmove(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+  // CHECK64: call ptr @wmemmove(ptr noundef @dest, ptr noundef @src, i64 noundef 4)
   __builtin_wmemmove(&dest, &src, 4);
 }
 
@@ -122,3 +132,10 @@ void test13(char *d, char *s, int c, size_t n) {
   // CHECK: call ptr @memccpy
   memccpy(d, s, c, n);
 }
+
+// CHECK-LABEL: @test14
+int test14(const void * ptr1, const void * ptr2, size_t num) {
+  // CHECK32: call i32 @llvm.memcmp.p0.p0.i32
+  // CHECK64: call i32 @llvm.memcmp.p0.p0.i64
+  return memcmp(ptr1, ptr2, num);
+}
diff --git a/clang/test/CodeGen/debug-info-extern-call.c b/clang/test/CodeGen/debug-info-extern-call.c
index 0d18dc436040fa..10bd64baebb377 100644
--- a/clang/test/CodeGen/debug-info-extern-call.c
+++ b/clang/test/CodeGen/debug-info-extern-call.c
@@ -29,22 +29,22 @@
 // DECLS-FOR-EXTERN: [[FN1_TYPES]] = !{[[X_TYPE:![0-9]+]],
 // DECLS-FOR-EXTERN: [[X_TYPE]] = !DIDerivedType(tag: DW_TAG_typedef, name: "x",
 // DECLS-FOR-EXTERN-SAME: baseType: [[INT_TYPE]])
-// DECLS-FOR-EXTERN: !DISubprogram(name: "memcmp"
+// DECLS-FOR-EXTERN: !DISubprogram(name: "strcmp"
 // DECLS-FOR-EXTERN: !DISubprogram(name: "__some_reserved_name"
 
 // NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "fn1"
-// NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "memcmp"
+// NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "strcmp"
 // NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "__some_reserved_name"
 
 typedef int x;
 extern x fn1(int a, int b);
-extern int memcmp(const void *s1, const void *s2, unsigned long n);
+extern int strcmp(const char *s1, const char *s2);
 extern void __some_reserved_name(void);
 
-int fn2 (int *src, int *dst) {
+int fn2 (char *src, char *dst) {
   int x = 4, y = 5;
   int res = fn1(x, y);
-  int res2 = memcmp(dst, src, res);
+  int res2 = strcmp(dst, src);
   __some_reserved_name();
   return res + res2;
 }
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 2e3507386df309..c5c68edaa28ea0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1190,6 +1190,12 @@ class SelectionDAG {
   /// stack arguments from being clobbered.
   SDValue getStackArgumentTokenFactor(SDValue Chain);
 
+  std::pair<SDValue, SDValue> getMemcmp(SDValue Chain, const SDLoc &dl,
+                                        SDValue Dst, SDValue Src, SDValue Size,
+                                        Align Alignment, bool isVol,
+                                        bool AlwaysInline, const CallInst *CI,
+                                        std::optional<bool> OverrideTailCall);
+
   /* \p CI if not null is the memset call being lowered.
    * \p OverrideTailCall is an optional parameter that can be used to override
    * the tail call optimization decision. */
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 1ca8c2565ab0b6..62c9071ddf0e59 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -967,6 +967,12 @@ def int_memcpy  : Intrinsic<[],
                              WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
                              ImmArg<ArgIndex<3>>]>;
 
+def int_memcmp
+    : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty],
+                [IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
+                 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
+                 ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
+
 // Memcpy semantic that is guaranteed to be inlined.
 // In particular this means that the generated code is not allowed to call any
 // external function.
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 13a27b58b9cd78..fa6dad96e59a68 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -498,6 +498,7 @@ HANDLE_LIBCALL(UO_F128, "__unordtf2")
 HANDLE_LIBCALL(UO_PPCF128, "__gcc_qunord")
 
 // Memory
+HANDLE_LIBCALL(MEMCMP, "memcmp")
 HANDLE_LIBCALL(MEMCPY, "memcpy")
 HANDLE_LIBCALL(MEMMOVE, "memmove")
 HANDLE_LIBCALL(MEMSET, "memset")
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 182529123ec6d8..b2e69586f62087 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8497,6 +8497,57 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
   }
 }
 
+std::pair<SDValue, SDValue>
+SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0,
+                        SDValue Mem1, SDValue Size, Align Alignment, bool isVol,
+                        bool AlwaysInline, const CallInst *CI,
+                        std::optional<bool> OverrideTailCall) {
+
+  // TODO: Add special case for situation where size is known.
+  // TODO: Add hooks for Target Specifc Code.
+  // TODO: Always inline not yet supported.
+  assert(!AlwaysInline && "Always inline for memcmp is not yet supported.");
+
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = PointerType::getUnqual(*getContext());
+  Entry.Node = Mem0;
+  Args.push_back(Entry);
+  Entry.Node = Mem1;
+  Args.push_back(Entry);
+
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+  Entry.Node = Size;
+  Args.push_back(Entry);
+
+  // FIXME: pass in SDLoc
+  TargetLowering::CallLoweringInfo CLI(*this);
+  bool IsTailCall = false;
+  if (OverrideTailCall.has_value()) {
+    IsTailCall = *OverrideTailCall;
+  } else {
+    bool LowersToMemcmp =
+        TLI->getLibcallName(RTLIB::MEMCMP) == StringRef("memcmp");
+    bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI);
+    IsTailCall = CI && CI->isTailCall() &&
+                 isInTailCallPosition(*CI, getTarget(),
+                                      ReturnsFirstArg && LowersToMemcmp);
+  }
+
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCMP),
+                    Type::getInt32Ty(*getContext()),
+                    getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCMP),
+                                      TLI->getPointerTy(getDataLayout())),
+                    std::move(Args))
+      .setTailCall(IsTailCall);
+
+  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+  return CallResult;
+}
+
 SDValue SelectionDAG::getMemcpy(
     SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size,
     Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a38a3e9b91052d..f0b2d610892120 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6527,6 +6527,25 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                             RegName, getValue(RegValue)));
     return;
   }
+  case Intrinsic::memcmp: {
+    const auto &CallI = cast<CallInst>(I);
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+
+    Align Mem0Align = CallI.getParamAlign(0).valueOrOne();
+    Align Mem1Align = CallI.getParamAlign(1).valueOrOne();
+    Align Alignment = std::min(Mem0Align, Mem1Align);
+    bool isVol = CallI.isVolatile();
+    SDValue Root = isVol ? getRoot() : getMemoryRoot();
+
+    std::pair<SDValue, SDValue> MC =
+        DAG.getMemcmp(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
+                      /* AlwaysInline */ false, &I, std::nullopt);
+    setValue(&I, MC.first);
+    updateDAGForMaybeTailCall(MC.second);
+    return;
+  }
   case Intrinsic::memcpy: {
     const auto &MCI = cast<MemCpyInst>(I);
     SDValue Op1 = getValue(I.getArgOperand(0));
@@ -9351,7 +9370,6 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     visitInlineAsm(I);
     return;
   }
-
   diagnoseDontCall(I);
 
   if (Function *F = I.getCalledFunction()) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e917ef3f5e8c9a..500582c124eb27 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1442,6 +1442,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setLibcallName(RTLIB::FREXP_F128, "frexpf128");
 
   if (Subtarget.isAIXABI()) {
+    setLibcallName(RTLIB::MEMCMP, isPPC64 ? "___memcmp64" : "___memcmp");
     setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
     setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
     setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
diff --git a/llvm/test/CodeGen/PowerPC/memintr.ll b/llvm/test/CodeGen/PowerPC/memintr.ll
new file mode 100644
index 00000000000000..5fbe7234b06221
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/memintr.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-LE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-BE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-AIX-64-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-AIX-32-P9
+
+
+define noundef ptr @memcpy_test(ptr noundef returned writeonly %destination, ptr nocapture noundef readonly %source, i32 noundef %num) local_unnamed_addr {
+; CHECK-LE-P9-LABEL: memcpy_test:
+; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    mflr r0
+; CHECK-LE-P9-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-LE-P9-NEXT:    .cfi_offset lr, 16
+; CHECK-LE-P9-NEXT:    .cfi_offset r30, -16
+; CHECK-LE-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-LE-P9-NEXT:    stdu r1, -48(r1)
+; CHECK-LE-P9-NEXT:    std r0, 64(r1)
+; CHECK-LE-P9-NEXT:    mr r30, r3
+; CHECK-LE-P9-NEXT:    bl memcpy
+; CHECK-LE-P9-NEXT:    nop
+; CHECK-LE-P9-NEXT:    mr r3, r30
+; CHECK-LE-P9-NEXT:    addi r1, r1, 48
+; CHECK-LE-P9-NEXT:    ld r0, 16(r1)
+; CHECK-LE-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-LE-P9-NEXT:    mtlr r0
+; CHECK-LE-P9-NEXT:    blr
+;
+; CHECK-BE-P9-LABEL: memcpy_test:
+; CHECK-BE-P9:       # %bb.0: # %entry
+; CHECK-BE-P9-NEXT:    mflr r0
+; CHECK-BE-P9-NEXT:    stdu r1, -128(r1)
+; CHECK-BE-P9-NEXT:    std r0, 144(r1)
+; CHECK-BE-P9-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-BE-P9-NEXT:    .cfi_offset lr, 16
+; CHECK-BE-P9-NEXT:    .cfi_offset r30, -16
+; CHECK-BE-P9-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; CHECK-BE-P9-NEXT:    mr r30, r3
+; CHECK-BE-P9-NEXT:    bl memcpy
+; CHECK-BE-P9-NEXT:    nop
+; CHECK-BE-P9-NEXT:    mr r3, r30
+; CHECK-BE-P9-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; CHECK-BE-P9-NEXT:    addi r1, r1, 128
+; CHECK-BE-P9-NEXT:    ld r0, 16(r1)
+; CHECK-BE-P9-NEXT:    mtlr r0
+; CHECK-BE-P9-NEXT:    blr
+;
+; CHECK-AIX-64-P9-LABEL: memcpy_test:
+; CHECK-AIX-64-P9:       # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT:    mflr r0
+; CHECK-AIX-64-P9-NEXT:    stdu r1, -128(r1)
+; CHECK-AIX-64-P9-NEXT:    std r0, 144(r1)
+; CHECK-AIX-64-P9-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
+; CHECK-AIX-64-P9-NEXT:    mr r31, r3
+; CHECK-AIX-64-P9-NEXT:    bl .___memmove64[PR]
+; CHECK-AIX-64-P9-NEXT:    nop
+; CHECK-AIX-64-P9-NEXT:    mr r3, r31
+; CHECK-AIX-64-P9-NEXT:    ld r31, 120(r1) # 8-byte Folded Reload
+; CHECK-AIX-64-P9-NEXT:    addi r1, r1, 128
+; CHECK-AIX-64-P9-NEXT:    ld r0, 16(r1)
+; CHECK-AIX-64-P9-NEXT:    mtlr r0
+; CHECK-AIX-64-P9-NEXT:    blr
+;
+; CHECK-AIX-32-P9-LABEL: memcpy_test:
+; CHECK-AIX-32-P9:       # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT:    mflr r0
+; CHECK-AIX-32-P9-NEXT:    stwu r1, -64(r1)
+; CHECK-AIX-32-P9-NEXT:    stw r0, 72(r1)
+; CHECK-AIX-32-P9-NEXT:    stw r31, 60(r1) # 4-byte Folded Spill
+; CHECK-AIX-32-P9-NEXT:    mr r31, r3
+; CHECK-AIX-32-P9-NEXT:    bl .___memmove[PR]
+; CHECK-AIX-32-P9-NEXT:    nop
+; CHECK-AIX-32-P9-NEXT:    mr r3, r31
+; CHECK-AIX-32-P9-NEXT:    lwz r31, 60(r1) # 4-byte Folded Reload
+; CHECK-AIX-32-P9-NEXT:    addi r1, r1, 64
+; CHECK-AIX-32-P9-NEXT:    lwz r0, 8(r1)
+; CHECK-AIX-32-P9-NEXT:    mtlr r0
+; CHECK-AIX-32-P9-NEXT:    blr
+entry:
+  tail call void @llvm.memcpy.p0.p0.i32(ptr align 1 %destination, ptr align 1 %source, i32 %num, i1 false)
+  ret ptr %destination
+}
+
+declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
+
+define i32 @memcmp_test(ptr nocapture noundef readonly %ptr1, ptr nocapture noundef readonly %ptr2, i32 noundef %num) local_unnamed_addr {
+; CHECK-LE-P9-LABEL: memcmp_test:
+; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    mflr r0
+; CHECK-LE-P9-NEXT:    stdu r1, -32(r1)
+; CHECK-LE-P9-NEXT:    std r0, 48(r1)
+; CHECK-LE-P9-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-LE-P9-NEXT:    .cfi_offset lr, 16
+; CHECK-LE-P9-NEXT:    bl memcmp
+; CHECK-LE-P9-NEXT:    nop
+; CHECK-LE-P9-NEXT:    addi r1, r1, 32
+; CHECK-LE-P9-NEXT:    ld r0, 16(r1)
+; CHECK-LE-P9-NEXT:    mtlr r0
+; CHECK-LE-P9-NEXT:    blr
+;
+; CHECK-BE-P9-LABEL: memcmp_test:
+; CHECK-BE-P9:       # %bb.0: # %entry
+; CHECK-BE-P9-NEXT:    mflr r0
+; CHECK-BE-P9-NEXT:    stdu r1, -112(r1)
+; CHECK-BE-P9-NEXT:    std r0, 128(r1)
+; CHECK-BE-P9-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-BE-P9-NEXT:    .cfi_offset lr, 16
+; CHECK-BE-P9-NEXT:    bl memcmp
+; CHECK-BE-P9-NEXT:    nop
+; CHECK-BE-P9-NEXT:    addi r1, r1, 112
+; CHECK-BE-P9-NEXT:    ld r0, 16(r1)
+; CHECK-BE-P9-NEXT:    mtlr r0
+; CHECK-BE-P9-NEXT:    blr
+;
+; CHECK-AIX-64-P9-LABEL: memcmp_test:
+; CHECK-AIX-64-P9:       # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT:    mflr r0
+; CHECK-AIX-64-P9-NEXT:    stdu r1, -112(r1)
+; CHECK-AIX-64-P9-NEXT:    std r0, 128(r1)
+; CHECK-AIX-64-P9-NEXT:    bl .___memcmp64[PR]
+; CHECK-AIX-64-P9-NEXT:    nop
+; CHECK-AIX-64-P9-NEXT:    addi r1, r1, 112
+; CHECK-AIX-64-P9-NEXT:    ld r0, 16(r1)
+; CHECK-AIX-64-P9-NEXT:    mtlr r0
+; CHECK-AIX-64-P9-NEXT:    blr
+;
+; CHECK-AIX-32-P9-LABEL: memcmp_test:
+; CHECK-AIX-32-P9:       # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT:    mflr r0
+; CHECK-AIX-32-P9-NEXT:    stwu r1, -64(r1)
+; CHECK-AIX-32-P9-NEXT:    stw r0, 72(r1)
+; CHECK-AIX-32-P9-NEXT:    bl .___memcmp[PR]
+; CHECK-AIX-32-P9-NEXT:    nop
+; CHECK-AIX-32-P9-NEXT:    addi r1, r1, 64
+; CHECK-AIX-32-P9-NEXT:    lwz r0, 8(r1)
+; CHECK-AIX-32-P9-NEXT:    mtlr r0
+; CHECK-AIX-32-P9-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.memcmp.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 %num)
+  ret i32 %0
+}
+
+declare i32 @llvm.memcmp.p0.p0.i32(ptr nocapture readonly, ptr nocapture readonly, i32)
+



More information about the cfe-commits mailing list