[clang] [llvm] [SelectionDAG][PowerPC] Add an intrinsic for memcmp. (PR #118178)

via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 30 08:13:25 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: Stefan Pintilie (stefanp-ibm)

<details>
<summary>Changes</summary>

In the backend we want to provide special handling for the function memcmp. This patch adds an intrinsic so that the backend will recognize it as more than just a regular function call.

This patch also adds special handling for PowerPC on AIX.

---
Full diff: https://github.com/llvm/llvm-project/pull/118178.diff


10 Files Affected:

- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+13) 
- (modified) clang/test/CodeGen/builtin-memfns.c (+35-18) 
- (modified) clang/test/CodeGen/debug-info-extern-call.c (+5-5) 
- (modified) llvm/include/llvm/CodeGen/SelectionDAG.h (+6) 
- (modified) llvm/include/llvm/IR/Intrinsics.td (+6) 
- (modified) llvm/include/llvm/IR/RuntimeLibcalls.def (+1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+51) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+19-1) 
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+1) 
- (added) llvm/test/CodeGen/PowerPC/memintr.ll (+151) 


``````````diff
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cb9c23b8e0a0d0..0688f4064d788b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4483,6 +4483,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(nullptr);
   }
 
+  case Builtin::BImemcmp: {
+    Address Src = EmitPointerWithAlignment(E->getArg(0));
+    Address Dst = EmitPointerWithAlignment(E->getArg(1));
+    Value *SizeVal = EmitScalarExpr(E->getArg(2));
+    llvm::Type *Tys[] = {Dst.getBasePointer()->getType(),
+                         Src.getBasePointer()->getType(), SizeVal->getType()};
+
+    Function *F = CGM.getIntrinsic(Intrinsic::memcmp, Tys);
+    Value *Mem1Value = EmitScalarExpr(E->getArg(0));
+    Value *Mem2Value = EmitScalarExpr(E->getArg(1));
+    Value *Args[] = {Mem1Value, Mem2Value, SizeVal};
+    return RValue::get(Builder.CreateCall(F, Args));
+  }
   case Builtin::BImemcpy:
   case Builtin::BI__builtin_memcpy:
   case Builtin::BImempcpy:
diff --git a/clang/test/CodeGen/builtin-memfns.c b/clang/test/CodeGen/builtin-memfns.c
index 23c3c60b779b37..51c5ad43cc7ef2 100644
--- a/clang/test/CodeGen/builtin-memfns.c
+++ b/clang/test/CodeGen/builtin-memfns.c
@@ -1,16 +1,23 @@
-// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm < %s| FileCheck %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm < %s| FileCheck %s --check-prefixes=CHECK,CHECK32
+// RUN: %clang_cc1 -triple ppc -emit-llvm < %s| FileCheck %s --check-prefixes=CHECK,CHECK32
+// RUN: %clang_cc1 -triple ppc64 -emit-llvm < %s| FileCheck %s --check-prefixes=CHECK,CHECK64
 
 typedef __WCHAR_TYPE__ wchar_t;
 typedef __SIZE_TYPE__ size_t;
 
 void *memcpy(void *, void const *, size_t);
 void *memccpy(void *, void const *, int, size_t);
-
-// CHECK: @test1
-// CHECK: call void @llvm.memset.p0.i32
-// CHECK: call void @llvm.memset.p0.i32
-// CHECK: call void @llvm.memcpy.p0.p0.i32
-// CHECK: call void @llvm.memmove.p0.p0.i32
+int memcmp(const void *, const void *, size_t);
+
+// CHECK-LABEL: @test1
+// CHECK32: call void @llvm.memset.p0.i32
+// CHECK64: call void @llvm.memset.p0.i64
+// CHECK32: call void @llvm.memset.p0.i32
+// CHECK64: call void @llvm.memset.p0.i64
+// CHECK32: call void @llvm.memcpy.p0.p0.i32
+// CHECK64: call void @llvm.memcpy.p0.p0.i64
+// CHECK32: call void @llvm.memmove.p0.p0.i32
+// CHECK64: call void @llvm.memmove.p0.p0.i64
 // CHECK-NOT: __builtin
 // CHECK: ret
 int test1(int argc, char **argv) {
@@ -23,37 +30,38 @@ int test1(int argc, char **argv) {
   return 0;
 }
 
-// CHECK: @test2
-// CHECK: call void @llvm.memcpy.p0.p0.i32
+// CHECK-LABEL: @test2
+// CHECK32: call void @llvm.memcpy.p0.p0.i32
+// CHECK64: call void @llvm.memcpy.p0.p0.i64
 char* test2(char* a, char* b) {
   return __builtin_memcpy(a, b, 4);
 }
 
-// CHECK: @test3
+// CHECK-LABEL: @test3
 // CHECK: call void @llvm.memset
 void test3(char *P) {
   __builtin___memset_chk(P, 42, 128, 128);
 }
 
-// CHECK: @test4
+// CHECK-LABEL: @test4
 // CHECK: call void @llvm.memcpy
 void test4(char *P, char *Q) {
   __builtin___memcpy_chk(P, Q, 128, 128);
 }
 
-// CHECK: @test5
+// CHECK-LABEL: @test5
 // CHECK: call void @llvm.memmove
 void test5(char *P, char *Q) {
   __builtin___memmove_chk(P, Q, 128, 128);
 }
 
-// CHECK: @test6
+// CHECK-LABEL: @test6
 // CHECK: call void @llvm.memcpy
 int test6(char *X) {
   return __builtin___memcpy_chk(X, X, 42, 42) != 0;
 }
 
-// CHECK: @test7
+// CHECK-LABEL: @test7
 // PR12094
 int test7(int *p) {
   struct snd_pcm_hw_params_t* hwparams;  // incomplete type.
@@ -75,14 +83,14 @@ struct PS {
 } __attribute__((packed));
 struct PS ps;
 void test8(int *arg) {
-  // CHECK: @test8
+  // CHECK-LABEL: @test8
   // CHECK: call void @llvm.memcpy{{.*}} align 4 {{.*}} align 1 {{.*}} 16, i1 false)
   __builtin_memcpy(arg, ps.modes, sizeof(struct PS));
 }
 
 __attribute((aligned(16))) int x[4], y[4];
 void test9(void) {
-  // CHECK: @test9
+  // CHECK-LABEL: @test9
   // CHECK: call void @llvm.memcpy{{.*}} align 16 {{.*}} align 16 {{.*}} 16, i1 false)
   __builtin_memcpy(x, y, sizeof(y));
 }
@@ -93,10 +101,12 @@ wchar_t src;
 // CHECK-LABEL: @test10
 // FIXME: Consider lowering these to llvm.memcpy / llvm.memmove.
 void test10(void) {
-  // CHECK: call ptr @wmemcpy(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+  // CHECK32: call ptr @wmemcpy(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+  // CHECK64: call ptr @wmemcpy(ptr noundef @dest, ptr noundef @src, i64 noundef 4)
   __builtin_wmemcpy(&dest, &src, 4);
 
-  // CHECK: call ptr @wmemmove(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+  // CHECK32: call ptr @wmemmove(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+  // CHECK64: call ptr @wmemmove(ptr noundef @dest, ptr noundef @src, i64 noundef 4)
   __builtin_wmemmove(&dest, &src, 4);
 }
 
@@ -122,3 +132,10 @@ void test13(char *d, char *s, int c, size_t n) {
   // CHECK: call ptr @memccpy
   memccpy(d, s, c, n);
 }
+
+// CHECK-LABEL: @test14
+int test14(const void * ptr1, const void * ptr2, size_t num) {
+  // CHECK32: call i32 @llvm.memcmp.p0.p0.i32
+  // CHECK64: call i32 @llvm.memcmp.p0.p0.i64
+  return memcmp(ptr1, ptr2, num);
+}
diff --git a/clang/test/CodeGen/debug-info-extern-call.c b/clang/test/CodeGen/debug-info-extern-call.c
index 0d18dc436040fa..10bd64baebb377 100644
--- a/clang/test/CodeGen/debug-info-extern-call.c
+++ b/clang/test/CodeGen/debug-info-extern-call.c
@@ -29,22 +29,22 @@
 // DECLS-FOR-EXTERN: [[FN1_TYPES]] = !{[[X_TYPE:![0-9]+]],
 // DECLS-FOR-EXTERN: [[X_TYPE]] = !DIDerivedType(tag: DW_TAG_typedef, name: "x",
 // DECLS-FOR-EXTERN-SAME: baseType: [[INT_TYPE]])
-// DECLS-FOR-EXTERN: !DISubprogram(name: "memcmp"
+// DECLS-FOR-EXTERN: !DISubprogram(name: "strcmp"
 // DECLS-FOR-EXTERN: !DISubprogram(name: "__some_reserved_name"
 
 // NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "fn1"
-// NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "memcmp"
+// NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "strcmp"
 // NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "__some_reserved_name"
 
 typedef int x;
 extern x fn1(int a, int b);
-extern int memcmp(const void *s1, const void *s2, unsigned long n);
+extern int strcmp(const char *s1, const char *s2);
 extern void __some_reserved_name(void);
 
-int fn2 (int *src, int *dst) {
+int fn2 (char *src, char *dst) {
   int x = 4, y = 5;
   int res = fn1(x, y);
-  int res2 = memcmp(dst, src, res);
+  int res2 = strcmp(dst, src);
   __some_reserved_name();
   return res + res2;
 }
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 2e3507386df309..c5c68edaa28ea0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1190,6 +1190,12 @@ class SelectionDAG {
   /// stack arguments from being clobbered.
   SDValue getStackArgumentTokenFactor(SDValue Chain);
 
+  std::pair<SDValue, SDValue> getMemcmp(SDValue Chain, const SDLoc &dl,
+                                        SDValue Dst, SDValue Src, SDValue Size,
+                                        Align Alignment, bool isVol,
+                                        bool AlwaysInline, const CallInst *CI,
+                                        std::optional<bool> OverrideTailCall);
+
   /* \p CI if not null is the memset call being lowered.
    * \p OverrideTailCall is an optional parameter that can be used to override
    * the tail call optimization decision. */
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 1ca8c2565ab0b6..62c9071ddf0e59 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -967,6 +967,12 @@ def int_memcpy  : Intrinsic<[],
                              WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
                              ImmArg<ArgIndex<3>>]>;
 
+def int_memcmp
+    : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty],
+                [IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
+                 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
+                 ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
+
 // Memcpy semantic that is guaranteed to be inlined.
 // In particular this means that the generated code is not allowed to call any
 // external function.
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 13a27b58b9cd78..fa6dad96e59a68 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -498,6 +498,7 @@ HANDLE_LIBCALL(UO_F128, "__unordtf2")
 HANDLE_LIBCALL(UO_PPCF128, "__gcc_qunord")
 
 // Memory
+HANDLE_LIBCALL(MEMCMP, "memcmp")
 HANDLE_LIBCALL(MEMCPY, "memcpy")
 HANDLE_LIBCALL(MEMMOVE, "memmove")
 HANDLE_LIBCALL(MEMSET, "memset")
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 182529123ec6d8..b2e69586f62087 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8497,6 +8497,57 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
   }
 }
 
+std::pair<SDValue, SDValue>
+SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0,
+                        SDValue Mem1, SDValue Size, Align Alignment, bool isVol,
+                        bool AlwaysInline, const CallInst *CI,
+                        std::optional<bool> OverrideTailCall) {
+
+  // TODO: Add special case for situation where size is known.
+  // TODO: Add hooks for Target Specifc Code.
+  // TODO: Always inline not yet supported.
+  assert(!AlwaysInline && "Always inline for memcmp is not yet supported.");
+
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = PointerType::getUnqual(*getContext());
+  Entry.Node = Mem0;
+  Args.push_back(Entry);
+  Entry.Node = Mem1;
+  Args.push_back(Entry);
+
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+  Entry.Node = Size;
+  Args.push_back(Entry);
+
+  // FIXME: pass in SDLoc
+  TargetLowering::CallLoweringInfo CLI(*this);
+  bool IsTailCall = false;
+  if (OverrideTailCall.has_value()) {
+    IsTailCall = *OverrideTailCall;
+  } else {
+    bool LowersToMemcmp =
+        TLI->getLibcallName(RTLIB::MEMCMP) == StringRef("memcmp");
+    bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI);
+    IsTailCall = CI && CI->isTailCall() &&
+                 isInTailCallPosition(*CI, getTarget(),
+                                      ReturnsFirstArg && LowersToMemcmp);
+  }
+
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCMP),
+                    Type::getInt32Ty(*getContext()),
+                    getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCMP),
+                                      TLI->getPointerTy(getDataLayout())),
+                    std::move(Args))
+      .setTailCall(IsTailCall);
+
+  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+  return CallResult;
+}
+
 SDValue SelectionDAG::getMemcpy(
     SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size,
     Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a38a3e9b91052d..f0b2d610892120 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6527,6 +6527,25 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                             RegName, getValue(RegValue)));
     return;
   }
+  case Intrinsic::memcmp: {
+    const auto &CallI = cast<CallInst>(I);
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+
+    Align Mem0Align = CallI.getParamAlign(0).valueOrOne();
+    Align Mem1Align = CallI.getParamAlign(1).valueOrOne();
+    Align Alignment = std::min(Mem0Align, Mem1Align);
+    bool isVol = CallI.isVolatile();
+    SDValue Root = isVol ? getRoot() : getMemoryRoot();
+
+    std::pair<SDValue, SDValue> MC =
+        DAG.getMemcmp(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
+                      /* AlwaysInline */ false, &I, std::nullopt);
+    setValue(&I, MC.first);
+    updateDAGForMaybeTailCall(MC.second);
+    return;
+  }
   case Intrinsic::memcpy: {
     const auto &MCI = cast<MemCpyInst>(I);
     SDValue Op1 = getValue(I.getArgOperand(0));
@@ -9351,7 +9370,6 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     visitInlineAsm(I);
     return;
   }
-
   diagnoseDontCall(I);
 
   if (Function *F = I.getCalledFunction()) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e917ef3f5e8c9a..500582c124eb27 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1442,6 +1442,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setLibcallName(RTLIB::FREXP_F128, "frexpf128");
 
   if (Subtarget.isAIXABI()) {
+    setLibcallName(RTLIB::MEMCMP, isPPC64 ? "___memcmp64" : "___memcmp");
     setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
     setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
     setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
diff --git a/llvm/test/CodeGen/PowerPC/memintr.ll b/llvm/test/CodeGen/PowerPC/memintr.ll
new file mode 100644
index 00000000000000..5fbe7234b06221
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/memintr.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-LE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-BE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-AIX-64-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK-AIX-32-P9
+
+
+define noundef ptr @memcpy_test(ptr noundef returned writeonly %destination, ptr nocapture noundef readonly %source, i32 noundef %num) local_unnamed_addr {
+; CHECK-LE-P9-LABEL: memcpy_test:
+; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    mflr r0
+; CHECK-LE-P9-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-LE-P9-NEXT:    .cfi_offset lr, 16
+; CHECK-LE-P9-NEXT:    .cfi_offset r30, -16
+; CHECK-LE-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-LE-P9-NEXT:    stdu r1, -48(r1)
+; CHECK-LE-P9-NEXT:    std r0, 64(r1)
+; CHECK-LE-P9-NEXT:    mr r30, r3
+; CHECK-LE-P9-NEXT:    bl memcpy
+; CHECK-LE-P9-NEXT:    nop
+; CHECK-LE-P9-NEXT:    mr r3, r30
+; CHECK-LE-P9-NEXT:    addi r1, r1, 48
+; CHECK-LE-P9-NEXT:    ld r0, 16(r1)
+; CHECK-LE-P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-LE-P9-NEXT:    mtlr r0
+; CHECK-LE-P9-NEXT:    blr
+;
+; CHECK-BE-P9-LABEL: memcpy_test:
+; CHECK-BE-P9:       # %bb.0: # %entry
+; CHECK-BE-P9-NEXT:    mflr r0
+; CHECK-BE-P9-NEXT:    stdu r1, -128(r1)
+; CHECK-BE-P9-NEXT:    std r0, 144(r1)
+; CHECK-BE-P9-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-BE-P9-NEXT:    .cfi_offset lr, 16
+; CHECK-BE-P9-NEXT:    .cfi_offset r30, -16
+; CHECK-BE-P9-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; CHECK-BE-P9-NEXT:    mr r30, r3
+; CHECK-BE-P9-NEXT:    bl memcpy
+; CHECK-BE-P9-NEXT:    nop
+; CHECK-BE-P9-NEXT:    mr r3, r30
+; CHECK-BE-P9-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; CHECK-BE-P9-NEXT:    addi r1, r1, 128
+; CHECK-BE-P9-NEXT:    ld r0, 16(r1)
+; CHECK-BE-P9-NEXT:    mtlr r0
+; CHECK-BE-P9-NEXT:    blr
+;
+; CHECK-AIX-64-P9-LABEL: memcpy_test:
+; CHECK-AIX-64-P9:       # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT:    mflr r0
+; CHECK-AIX-64-P9-NEXT:    stdu r1, -128(r1)
+; CHECK-AIX-64-P9-NEXT:    std r0, 144(r1)
+; CHECK-AIX-64-P9-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
+; CHECK-AIX-64-P9-NEXT:    mr r31, r3
+; CHECK-AIX-64-P9-NEXT:    bl .___memmove64[PR]
+; CHECK-AIX-64-P9-NEXT:    nop
+; CHECK-AIX-64-P9-NEXT:    mr r3, r31
+; CHECK-AIX-64-P9-NEXT:    ld r31, 120(r1) # 8-byte Folded Reload
+; CHECK-AIX-64-P9-NEXT:    addi r1, r1, 128
+; CHECK-AIX-64-P9-NEXT:    ld r0, 16(r1)
+; CHECK-AIX-64-P9-NEXT:    mtlr r0
+; CHECK-AIX-64-P9-NEXT:    blr
+;
+; CHECK-AIX-32-P9-LABEL: memcpy_test:
+; CHECK-AIX-32-P9:       # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT:    mflr r0
+; CHECK-AIX-32-P9-NEXT:    stwu r1, -64(r1)
+; CHECK-AIX-32-P9-NEXT:    stw r0, 72(r1)
+; CHECK-AIX-32-P9-NEXT:    stw r31, 60(r1) # 4-byte Folded Spill
+; CHECK-AIX-32-P9-NEXT:    mr r31, r3
+; CHECK-AIX-32-P9-NEXT:    bl .___memmove[PR]
+; CHECK-AIX-32-P9-NEXT:    nop
+; CHECK-AIX-32-P9-NEXT:    mr r3, r31
+; CHECK-AIX-32-P9-NEXT:    lwz r31, 60(r1) # 4-byte Folded Reload
+; CHECK-AIX-32-P9-NEXT:    addi r1, r1, 64
+; CHECK-AIX-32-P9-NEXT:    lwz r0, 8(r1)
+; CHECK-AIX-32-P9-NEXT:    mtlr r0
+; CHECK-AIX-32-P9-NEXT:    blr
+entry:
+  tail call void @llvm.memcpy.p0.p0.i32(ptr align 1 %destination, ptr align 1 %source, i32 %num, i1 false)
+  ret ptr %destination
+}
+
+declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
+
+define i32 @memcmp_test(ptr nocapture noundef readonly %ptr1, ptr nocapture noundef readonly %ptr2, i32 noundef %num) local_unnamed_addr {
+; CHECK-LE-P9-LABEL: memcmp_test:
+; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    mflr r0
+; CHECK-LE-P9-NEXT:    stdu r1, -32(r1)
+; CHECK-LE-P9-NEXT:    std r0, 48(r1)
+; CHECK-LE-P9-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-LE-P9-NEXT:    .cfi_offset lr, 16
+; CHECK-LE-P9-NEXT:    bl memcmp
+; CHECK-LE-P9-NEXT:    nop
+; CHECK-LE-P9-NEXT:    addi r1, r1, 32
+; CHECK-LE-P9-NEXT:    ld r0, 16(r1)
+; CHECK-LE-P9-NEXT:    mtlr r0
+; CHECK-LE-P9-NEXT:    blr
+;
+; CHECK-BE-P9-LABEL: memcmp_test:
+; CHECK-BE-P9:       # %bb.0: # %entry
+; CHECK-BE-P9-NEXT:    mflr r0
+; CHECK-BE-P9-NEXT:    stdu r1, -112(r1)
+; CHECK-BE-P9-NEXT:    std r0, 128(r1)
+; CHECK-BE-P9-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-BE-P9-NEXT:    .cfi_offset lr, 16
+; CHECK-BE-P9-NEXT:    bl memcmp
+; CHECK-BE-P9-NEXT:    nop
+; CHECK-BE-P9-NEXT:    addi r1, r1, 112
+; CHECK-BE-P9-NEXT:    ld r0, 16(r1)
+; CHECK-BE-P9-NEXT:    mtlr r0
+; CHECK-BE-P9-NEXT:    blr
+;
+; CHECK-AIX-64-P9-LABEL: memcmp_test:
+; CHECK-AIX-64-P9:       # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT:    mflr r0
+; CHECK-AIX-64-P9-NEXT:    stdu r1, -112(r1)
+; CHECK-AIX-64-P9-NEXT:    std r0, 128(r1)
+; CHECK-AIX-64-P9-NEXT:    bl .___memcmp64[PR]
+; CHECK-AIX-64-P9-NEXT:    nop
+; CHECK-AIX-64-P9-NEXT:    addi r1, r1, 112
+; CHECK-AIX-64-P9-NEXT:    ld r0, 16(r1)
+; CHECK-AIX-64-P9-NEXT:    mtlr r0
+; CHECK-AIX-64-P9-NEXT:    blr
+;
+; CHECK-AIX-32-P9-LABEL: memcmp_test:
+; CHECK-AIX-32-P9:       # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT:    mflr r0
+; CHECK-AIX-32-P9-NEXT:    stwu r1, -64(r1)
+; CHECK-AIX-32-P9-NEXT:    stw r0, 72(r1)
+; CHECK-AIX-32-P9-NEXT:    bl .___memcmp[PR]
+; CHECK-AIX-32-P9-NEXT:    nop
+; CHECK-AIX-32-P9-NEXT:    addi r1, r1, 64
+; CHECK-AIX-32-P9-NEXT:    lwz r0, 8(r1)
+; CHECK-AIX-32-P9-NEXT:    mtlr r0
+; CHECK-AIX-32-P9-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.memcmp.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 %num)
+  ret i32 %0
+}
+
+declare i32 @llvm.memcmp.p0.p0.i32(ptr nocapture readonly, ptr nocapture readonly, i32)
+

``````````

</details>


https://github.com/llvm/llvm-project/pull/118178


More information about the llvm-commits mailing list