[clang] [llvm] [SelectionDAG][PowerPC] Add an intrinsic for memcmp. (PR #118178)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 30 08:13:25 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Stefan Pintilie (stefanp-ibm)
<details>
<summary>Changes</summary>
In the backend we want to provide special handling for the function memcmp. This patch adds an intrinsic so that the backend will recognize it as more than just a regular function call.
This patch also adds special handling for PowerPC on AIX.
---
Full diff: https://github.com/llvm/llvm-project/pull/118178.diff
10 Files Affected:
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+13)
- (modified) clang/test/CodeGen/builtin-memfns.c (+35-18)
- (modified) clang/test/CodeGen/debug-info-extern-call.c (+5-5)
- (modified) llvm/include/llvm/CodeGen/SelectionDAG.h (+6)
- (modified) llvm/include/llvm/IR/Intrinsics.td (+6)
- (modified) llvm/include/llvm/IR/RuntimeLibcalls.def (+1)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+51)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+19-1)
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+1)
- (added) llvm/test/CodeGen/PowerPC/memintr.ll (+151)
``````````diff
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cb9c23b8e0a0d0..0688f4064d788b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4483,6 +4483,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(nullptr);
}
+ case Builtin::BImemcmp: {
+ Address Src = EmitPointerWithAlignment(E->getArg(0));
+ Address Dst = EmitPointerWithAlignment(E->getArg(1));
+ Value *SizeVal = EmitScalarExpr(E->getArg(2));
+ llvm::Type *Tys[] = {Dst.getBasePointer()->getType(),
+ Src.getBasePointer()->getType(), SizeVal->getType()};
+
+ Function *F = CGM.getIntrinsic(Intrinsic::memcmp, Tys);
+ Value *Mem1Value = EmitScalarExpr(E->getArg(0));
+ Value *Mem2Value = EmitScalarExpr(E->getArg(1));
+ Value *Args[] = {Mem1Value, Mem2Value, SizeVal};
+ return RValue::get(Builder.CreateCall(F, Args));
+ }
case Builtin::BImemcpy:
case Builtin::BI__builtin_memcpy:
case Builtin::BImempcpy:
diff --git a/clang/test/CodeGen/builtin-memfns.c b/clang/test/CodeGen/builtin-memfns.c
index 23c3c60b779b37..51c5ad43cc7ef2 100644
--- a/clang/test/CodeGen/builtin-memfns.c
+++ b/clang/test/CodeGen/builtin-memfns.c
@@ -1,16 +1,23 @@
-// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm < %s| FileCheck %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm < %s| FileCheck %s --check-prefixes=CHECK,CHECK32
+// RUN: %clang_cc1 -triple ppc -emit-llvm < %s| FileCheck %s --check-prefixes=CHECK,CHECK32
+// RUN: %clang_cc1 -triple ppc64 -emit-llvm < %s| FileCheck %s --check-prefixes=CHECK,CHECK64
typedef __WCHAR_TYPE__ wchar_t;
typedef __SIZE_TYPE__ size_t;
void *memcpy(void *, void const *, size_t);
void *memccpy(void *, void const *, int, size_t);
-
-// CHECK: @test1
-// CHECK: call void @llvm.memset.p0.i32
-// CHECK: call void @llvm.memset.p0.i32
-// CHECK: call void @llvm.memcpy.p0.p0.i32
-// CHECK: call void @llvm.memmove.p0.p0.i32
+int memcmp(const void *, const void *, size_t);
+
+// CHECK-LABEL: @test1
+// CHECK32: call void @llvm.memset.p0.i32
+// CHECK64: call void @llvm.memset.p0.i64
+// CHECK32: call void @llvm.memset.p0.i32
+// CHECK64: call void @llvm.memset.p0.i64
+// CHECK32: call void @llvm.memcpy.p0.p0.i32
+// CHECK64: call void @llvm.memcpy.p0.p0.i64
+// CHECK32: call void @llvm.memmove.p0.p0.i32
+// CHECK64: call void @llvm.memmove.p0.p0.i64
// CHECK-NOT: __builtin
// CHECK: ret
int test1(int argc, char **argv) {
@@ -23,37 +30,38 @@ int test1(int argc, char **argv) {
return 0;
}
-// CHECK: @test2
-// CHECK: call void @llvm.memcpy.p0.p0.i32
+// CHECK-LABEL: @test2
+// CHECK32: call void @llvm.memcpy.p0.p0.i32
+// CHECK64: call void @llvm.memcpy.p0.p0.i64
char* test2(char* a, char* b) {
return __builtin_memcpy(a, b, 4);
}
-// CHECK: @test3
+// CHECK-LABEL: @test3
// CHECK: call void @llvm.memset
void test3(char *P) {
__builtin___memset_chk(P, 42, 128, 128);
}
-// CHECK: @test4
+// CHECK-LABEL: @test4
// CHECK: call void @llvm.memcpy
void test4(char *P, char *Q) {
__builtin___memcpy_chk(P, Q, 128, 128);
}
-// CHECK: @test5
+// CHECK-LABEL: @test5
// CHECK: call void @llvm.memmove
void test5(char *P, char *Q) {
__builtin___memmove_chk(P, Q, 128, 128);
}
-// CHECK: @test6
+// CHECK-LABEL: @test6
// CHECK: call void @llvm.memcpy
int test6(char *X) {
return __builtin___memcpy_chk(X, X, 42, 42) != 0;
}
-// CHECK: @test7
+// CHECK-LABEL: @test7
// PR12094
int test7(int *p) {
struct snd_pcm_hw_params_t* hwparams; // incomplete type.
@@ -75,14 +83,14 @@ struct PS {
} __attribute__((packed));
struct PS ps;
void test8(int *arg) {
- // CHECK: @test8
+ // CHECK-LABEL: @test8
// CHECK: call void @llvm.memcpy{{.*}} align 4 {{.*}} align 1 {{.*}} 16, i1 false)
__builtin_memcpy(arg, ps.modes, sizeof(struct PS));
}
__attribute((aligned(16))) int x[4], y[4];
void test9(void) {
- // CHECK: @test9
+ // CHECK-LABEL: @test9
// CHECK: call void @llvm.memcpy{{.*}} align 16 {{.*}} align 16 {{.*}} 16, i1 false)
__builtin_memcpy(x, y, sizeof(y));
}
@@ -93,10 +101,12 @@ wchar_t src;
// CHECK-LABEL: @test10
// FIXME: Consider lowering these to llvm.memcpy / llvm.memmove.
void test10(void) {
- // CHECK: call ptr @wmemcpy(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+ // CHECK32: call ptr @wmemcpy(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+ // CHECK64: call ptr @wmemcpy(ptr noundef @dest, ptr noundef @src, i64 noundef 4)
__builtin_wmemcpy(&dest, &src, 4);
- // CHECK: call ptr @wmemmove(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+ // CHECK32: call ptr @wmemmove(ptr noundef @dest, ptr noundef @src, i32 noundef 4)
+ // CHECK64: call ptr @wmemmove(ptr noundef @dest, ptr noundef @src, i64 noundef 4)
__builtin_wmemmove(&dest, &src, 4);
}
@@ -122,3 +132,10 @@ void test13(char *d, char *s, int c, size_t n) {
// CHECK: call ptr @memccpy
memccpy(d, s, c, n);
}
+
+// CHECK-LABEL: @test14
+int test14(const void * ptr1, const void * ptr2, size_t num) {
+ // CHECK32: call i32 @llvm.memcmp.p0.p0.i32
+ // CHECK64: call i32 @llvm.memcmp.p0.p0.i64
+ return memcmp(ptr1, ptr2, num);
+}
diff --git a/clang/test/CodeGen/debug-info-extern-call.c b/clang/test/CodeGen/debug-info-extern-call.c
index 0d18dc436040fa..10bd64baebb377 100644
--- a/clang/test/CodeGen/debug-info-extern-call.c
+++ b/clang/test/CodeGen/debug-info-extern-call.c
@@ -29,22 +29,22 @@
// DECLS-FOR-EXTERN: [[FN1_TYPES]] = !{[[X_TYPE:![0-9]+]],
// DECLS-FOR-EXTERN: [[X_TYPE]] = !DIDerivedType(tag: DW_TAG_typedef, name: "x",
// DECLS-FOR-EXTERN-SAME: baseType: [[INT_TYPE]])
-// DECLS-FOR-EXTERN: !DISubprogram(name: "memcmp"
+// DECLS-FOR-EXTERN: !DISubprogram(name: "strcmp"
// DECLS-FOR-EXTERN: !DISubprogram(name: "__some_reserved_name"
// NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "fn1"
-// NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "memcmp"
+// NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "strcmp"
// NO-DECLS-FOR-EXTERN-NOT: !DISubprogram(name: "__some_reserved_name"
typedef int x;
extern x fn1(int a, int b);
-extern int memcmp(const void *s1, const void *s2, unsigned long n);
+extern int strcmp(const char *s1, const char *s2);
extern void __some_reserved_name(void);
-int fn2 (int *src, int *dst) {
+int fn2 (char *src, char *dst) {
int x = 4, y = 5;
int res = fn1(x, y);
- int res2 = memcmp(dst, src, res);
+ int res2 = strcmp(dst, src);
__some_reserved_name();
return res + res2;
}
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 2e3507386df309..c5c68edaa28ea0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1190,6 +1190,12 @@ class SelectionDAG {
/// stack arguments from being clobbered.
SDValue getStackArgumentTokenFactor(SDValue Chain);
+ std::pair<SDValue, SDValue> getMemcmp(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, SDValue Src, SDValue Size,
+ Align Alignment, bool isVol,
+ bool AlwaysInline, const CallInst *CI,
+ std::optional<bool> OverrideTailCall);
+
/* \p CI if not null is the memset call being lowered.
* \p OverrideTailCall is an optional parameter that can be used to override
* the tail call optimization decision. */
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 1ca8c2565ab0b6..62c9071ddf0e59 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -967,6 +967,12 @@ def int_memcpy : Intrinsic<[],
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
ImmArg<ArgIndex<3>>]>;
+def int_memcmp
+ : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty],
+ [IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
+ NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
+ ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>]>;
+
// Memcpy semantic that is guaranteed to be inlined.
// In particular this means that the generated code is not allowed to call any
// external function.
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 13a27b58b9cd78..fa6dad96e59a68 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -498,6 +498,7 @@ HANDLE_LIBCALL(UO_F128, "__unordtf2")
HANDLE_LIBCALL(UO_PPCF128, "__gcc_qunord")
// Memory
+HANDLE_LIBCALL(MEMCMP, "memcmp")
HANDLE_LIBCALL(MEMCPY, "memcpy")
HANDLE_LIBCALL(MEMMOVE, "memmove")
HANDLE_LIBCALL(MEMSET, "memset")
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 182529123ec6d8..b2e69586f62087 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8497,6 +8497,57 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
}
}
+std::pair<SDValue, SDValue>
+SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0,
+ SDValue Mem1, SDValue Size, Align Alignment, bool isVol,
+ bool AlwaysInline, const CallInst *CI,
+ std::optional<bool> OverrideTailCall) {
+
+ // TODO: Add special case for situation where size is known.
+ // TODO: Add hooks for Target Specifc Code.
+ // TODO: Always inline not yet supported.
+ assert(!AlwaysInline && "Always inline for memcmp is not yet supported.");
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = PointerType::getUnqual(*getContext());
+ Entry.Node = Mem0;
+ Args.push_back(Entry);
+ Entry.Node = Mem1;
+ Args.push_back(Entry);
+
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ // FIXME: pass in SDLoc
+ TargetLowering::CallLoweringInfo CLI(*this);
+ bool IsTailCall = false;
+ if (OverrideTailCall.has_value()) {
+ IsTailCall = *OverrideTailCall;
+ } else {
+ bool LowersToMemcmp =
+ TLI->getLibcallName(RTLIB::MEMCMP) == StringRef("memcmp");
+ bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI);
+ IsTailCall = CI && CI->isTailCall() &&
+ isInTailCallPosition(*CI, getTarget(),
+ ReturnsFirstArg && LowersToMemcmp);
+ }
+
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCMP),
+ Type::getInt32Ty(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCMP),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setTailCall(IsTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult;
+}
+
SDValue SelectionDAG::getMemcpy(
SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size,
Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a38a3e9b91052d..f0b2d610892120 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6527,6 +6527,25 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
RegName, getValue(RegValue)));
return;
}
+ case Intrinsic::memcmp: {
+ const auto &CallI = cast<CallInst>(I);
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+
+ Align Mem0Align = CallI.getParamAlign(0).valueOrOne();
+ Align Mem1Align = CallI.getParamAlign(1).valueOrOne();
+ Align Alignment = std::min(Mem0Align, Mem1Align);
+ bool isVol = CallI.isVolatile();
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+
+ std::pair<SDValue, SDValue> MC =
+ DAG.getMemcmp(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
+ /* AlwaysInline */ false, &I, std::nullopt);
+ setValue(&I, MC.first);
+ updateDAGForMaybeTailCall(MC.second);
+ return;
+ }
case Intrinsic::memcpy: {
const auto &MCI = cast<MemCpyInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
@@ -9351,7 +9370,6 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
visitInlineAsm(I);
return;
}
-
diagnoseDontCall(I);
if (Function *F = I.getCalledFunction()) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e917ef3f5e8c9a..500582c124eb27 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1442,6 +1442,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::FREXP_F128, "frexpf128");
if (Subtarget.isAIXABI()) {
+ setLibcallName(RTLIB::MEMCMP, isPPC64 ? "___memcmp64" : "___memcmp");
setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
diff --git a/llvm/test/CodeGen/PowerPC/memintr.ll b/llvm/test/CodeGen/PowerPC/memintr.ll
new file mode 100644
index 00000000000000..5fbe7234b06221
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/memintr.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9
+
+
+define noundef ptr @memcpy_test(ptr noundef returned writeonly %destination, ptr nocapture noundef readonly %source, i32 noundef %num) local_unnamed_addr {
+; CHECK-LE-P9-LABEL: memcpy_test:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mflr r0
+; CHECK-LE-P9-NEXT: .cfi_def_cfa_offset 48
+; CHECK-LE-P9-NEXT: .cfi_offset lr, 16
+; CHECK-LE-P9-NEXT: .cfi_offset r30, -16
+; CHECK-LE-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-LE-P9-NEXT: stdu r1, -48(r1)
+; CHECK-LE-P9-NEXT: std r0, 64(r1)
+; CHECK-LE-P9-NEXT: mr r30, r3
+; CHECK-LE-P9-NEXT: bl memcpy
+; CHECK-LE-P9-NEXT: nop
+; CHECK-LE-P9-NEXT: mr r3, r30
+; CHECK-LE-P9-NEXT: addi r1, r1, 48
+; CHECK-LE-P9-NEXT: ld r0, 16(r1)
+; CHECK-LE-P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-LE-P9-NEXT: mtlr r0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: memcpy_test:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mflr r0
+; CHECK-BE-P9-NEXT: stdu r1, -128(r1)
+; CHECK-BE-P9-NEXT: std r0, 144(r1)
+; CHECK-BE-P9-NEXT: .cfi_def_cfa_offset 128
+; CHECK-BE-P9-NEXT: .cfi_offset lr, 16
+; CHECK-BE-P9-NEXT: .cfi_offset r30, -16
+; CHECK-BE-P9-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; CHECK-BE-P9-NEXT: mr r30, r3
+; CHECK-BE-P9-NEXT: bl memcpy
+; CHECK-BE-P9-NEXT: nop
+; CHECK-BE-P9-NEXT: mr r3, r30
+; CHECK-BE-P9-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; CHECK-BE-P9-NEXT: addi r1, r1, 128
+; CHECK-BE-P9-NEXT: ld r0, 16(r1)
+; CHECK-BE-P9-NEXT: mtlr r0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: memcpy_test:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mflr r0
+; CHECK-AIX-64-P9-NEXT: stdu r1, -128(r1)
+; CHECK-AIX-64-P9-NEXT: std r0, 144(r1)
+; CHECK-AIX-64-P9-NEXT: std r31, 120(r1) # 8-byte Folded Spill
+; CHECK-AIX-64-P9-NEXT: mr r31, r3
+; CHECK-AIX-64-P9-NEXT: bl .___memmove64[PR]
+; CHECK-AIX-64-P9-NEXT: nop
+; CHECK-AIX-64-P9-NEXT: mr r3, r31
+; CHECK-AIX-64-P9-NEXT: ld r31, 120(r1) # 8-byte Folded Reload
+; CHECK-AIX-64-P9-NEXT: addi r1, r1, 128
+; CHECK-AIX-64-P9-NEXT: ld r0, 16(r1)
+; CHECK-AIX-64-P9-NEXT: mtlr r0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: memcpy_test:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: mflr r0
+; CHECK-AIX-32-P9-NEXT: stwu r1, -64(r1)
+; CHECK-AIX-32-P9-NEXT: stw r0, 72(r1)
+; CHECK-AIX-32-P9-NEXT: stw r31, 60(r1) # 4-byte Folded Spill
+; CHECK-AIX-32-P9-NEXT: mr r31, r3
+; CHECK-AIX-32-P9-NEXT: bl .___memmove[PR]
+; CHECK-AIX-32-P9-NEXT: nop
+; CHECK-AIX-32-P9-NEXT: mr r3, r31
+; CHECK-AIX-32-P9-NEXT: lwz r31, 60(r1) # 4-byte Folded Reload
+; CHECK-AIX-32-P9-NEXT: addi r1, r1, 64
+; CHECK-AIX-32-P9-NEXT: lwz r0, 8(r1)
+; CHECK-AIX-32-P9-NEXT: mtlr r0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ tail call void @llvm.memcpy.p0.p0.i32(ptr align 1 %destination, ptr align 1 %source, i32 %num, i1 false)
+ ret ptr %destination
+}
+
+declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
+
+define i32 @memcmp_test(ptr nocapture noundef readonly %ptr1, ptr nocapture noundef readonly %ptr2, i32 noundef %num) local_unnamed_addr {
+; CHECK-LE-P9-LABEL: memcmp_test:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mflr r0
+; CHECK-LE-P9-NEXT: stdu r1, -32(r1)
+; CHECK-LE-P9-NEXT: std r0, 48(r1)
+; CHECK-LE-P9-NEXT: .cfi_def_cfa_offset 32
+; CHECK-LE-P9-NEXT: .cfi_offset lr, 16
+; CHECK-LE-P9-NEXT: bl memcmp
+; CHECK-LE-P9-NEXT: nop
+; CHECK-LE-P9-NEXT: addi r1, r1, 32
+; CHECK-LE-P9-NEXT: ld r0, 16(r1)
+; CHECK-LE-P9-NEXT: mtlr r0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: memcmp_test:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mflr r0
+; CHECK-BE-P9-NEXT: stdu r1, -112(r1)
+; CHECK-BE-P9-NEXT: std r0, 128(r1)
+; CHECK-BE-P9-NEXT: .cfi_def_cfa_offset 112
+; CHECK-BE-P9-NEXT: .cfi_offset lr, 16
+; CHECK-BE-P9-NEXT: bl memcmp
+; CHECK-BE-P9-NEXT: nop
+; CHECK-BE-P9-NEXT: addi r1, r1, 112
+; CHECK-BE-P9-NEXT: ld r0, 16(r1)
+; CHECK-BE-P9-NEXT: mtlr r0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: memcmp_test:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mflr r0
+; CHECK-AIX-64-P9-NEXT: stdu r1, -112(r1)
+; CHECK-AIX-64-P9-NEXT: std r0, 128(r1)
+; CHECK-AIX-64-P9-NEXT: bl .___memcmp64[PR]
+; CHECK-AIX-64-P9-NEXT: nop
+; CHECK-AIX-64-P9-NEXT: addi r1, r1, 112
+; CHECK-AIX-64-P9-NEXT: ld r0, 16(r1)
+; CHECK-AIX-64-P9-NEXT: mtlr r0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: memcmp_test:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: mflr r0
+; CHECK-AIX-32-P9-NEXT: stwu r1, -64(r1)
+; CHECK-AIX-32-P9-NEXT: stw r0, 72(r1)
+; CHECK-AIX-32-P9-NEXT: bl .___memcmp[PR]
+; CHECK-AIX-32-P9-NEXT: nop
+; CHECK-AIX-32-P9-NEXT: addi r1, r1, 64
+; CHECK-AIX-32-P9-NEXT: lwz r0, 8(r1)
+; CHECK-AIX-32-P9-NEXT: mtlr r0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.memcmp.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 %num)
+ ret i32 %0
+}
+
+declare i32 @llvm.memcmp.p0.p0.i32(ptr nocapture readonly, ptr nocapture readonly, i32)
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/118178
More information about the llvm-commits
mailing list