[llvm] fd22614 - [AIX] Lower some memory intrinsics to millicode functions on AIX
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 20 19:26:25 PST 2023
Author: esmeyi
Date: 2023-02-20T22:25:49-05:00
New Revision: fd226142fcff9441f58ab87b6d26b36765af2c81
URL: https://github.com/llvm/llvm-project/commit/fd226142fcff9441f58ab87b6d26b36765af2c81
DIFF: https://github.com/llvm/llvm-project/commit/fd226142fcff9441f58ab87b6d26b36765af2c81.diff
LOG: [AIX] Lower some memory intrinsics to millicode functions on AIX
Summary: Currently we lower MEMCPY/MEMMOVE/MEMSET/BZERO to the corresponding libc functions. And the libc functions call the millicode functions on AIX. We can lower these intrinsics directly to save one call layer.
Reviewed By: shchenz
Differential Revision: https://reviews.llvm.org/D143997
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll
llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 4bcc287d8d17a..80bd3e0bfe4e2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1413,6 +1413,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
setLibcallName(RTLIB::FMA_F128, "fmaf128");
+ if (Subtarget.isAIXABI()) {
+ setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
+ setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
+ setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
+ setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
+ }
+
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
if (Subtarget.useCRBits()) {
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
index 5135c6d93bb58..77f3abb4ba215 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
@@ -87,7 +87,7 @@ entry:
; 32BIT-DAG: $r3 = COPY %0
; 32BIT-DAG: $r4 = COPY %1
; 32BIT-DAG: $r5 = COPY %2
-; 32BIT-NEXT: BL_NOP &".memcpy[PR]", csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
+; 32BIT-NEXT: BL_NOP &".___memmove[PR]", csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
; 32BIT: ADJCALLSTACKDOWN 312, 0, implicit-def dead $r1, implicit $r1
; 32BIT-DAG: $r3 = COPY %{{[0-9]+}}
@@ -108,7 +108,7 @@ entry:
; ASM32BIT-DAG: lwz 4, L..C{{[0-9]+}}(2)
; ASM32BIT-DAG: li 5, 256
; ASM32BIT-DAG: stw 0, 328(1)
-; ASM32BIT-NEXT: bl .memcpy[PR]
+; ASM32BIT-NEXT: bl .___memmove[PR]
; ASM32BIT: bl .test_byval_mem2
; ASM32BIT: addi 1, 1, 320
@@ -120,7 +120,7 @@ entry:
; 64BIT-DAG: $x3 = COPY %0
; 64BIT-DAG: $x4 = COPY %1
; 64BIT-DAG: $x5 = COPY %2
-; 64BIT-NEXT: BL8_NOP &".memcpy[PR]", csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
+; 64BIT-NEXT: BL8_NOP &".___memmove64[PR]", csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
; 64BIT: ADJCALLSTACKDOWN 368, 0, implicit-def dead $r1, implicit $r1
; 64BIT-DAG: $x3 = COPY %{{[0-9]+}}
@@ -139,7 +139,7 @@ entry:
; ASM64BIT-DAG: ld 4, L..C{{[0-9]+}}(2)
; ASM64BIT-DAG: li 5, 256
; ASM64BIT-DAG: std 0, 384(1)
-; ASM64BIT-NEXT: bl .memcpy[PR]
+; ASM64BIT-NEXT: bl .___memmove64[PR]
; ASM64BIT: bl .test_byval_mem2
; ASM64BIT: addi 1, 1, 368
@@ -188,7 +188,7 @@ entry:
; 32BIT-DAG: $r3 = COPY %2
; 32BIT-DAG: $r4 = COPY %1
; 32BIT-DAG: $r5 = COPY %3
-; 32BIT-NEXT: BL_NOP &".memcpy[PR]", csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
+; 32BIT-NEXT: BL_NOP &".___memmove[PR]", csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
; 32BIT: ADJCALLSTACKDOWN 92, 0, implicit-def dead $r1, implicit $r1
; 32BIT-DAG: $r3 = COPY %{{[0-9]+}}
@@ -209,7 +209,7 @@ entry:
; ASM32BIT-DAG: addi 3, 1, 56
; ASM32BIT-DAG: addi 4, [[REG]], 24
; ASM32BIT-DAG: li 5, 33
-; ASM32BIT-NEXT: bl .memcpy[PR]
+; ASM32BIT-NEXT: bl .___memmove[PR]
; ASM32BIT-DAG: lwz 5, 0([[REG]])
; ASM32BIT-DAG: lwz 6, 4([[REG]])
; ASM32BIT-DAG: lwz 7, 8([[REG]])
@@ -306,7 +306,7 @@ entry:
; 32BIT-DAG: $r3 = COPY %3
; 32BIT-DAG: $r4 = COPY %4
; 32BIT-DAG: $r5 = COPY %5
-; 32BIT-NEXT: BL_NOP &".memcpy[PR]", csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
+; 32BIT-NEXT: BL_NOP &".___memmove[PR]", csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3
; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
; 32BIT: ADJCALLSTACKDOWN 316, 0, implicit-def dead $r1, implicit $r1
; 32BIT-DAG: $r3 = COPY %{{[0-9]+}}
@@ -331,7 +331,7 @@ entry:
; ASM32BIT-DAG: addi 3, 1, 60
; ASM32BIT-DAG: lwz 4, L..C{{[0-9]+}}(2)
; ASM32BIT-DAG: li 5, 256
-; ASM32BIT-NEXT: bl .memcpy[PR]
+; ASM32BIT-NEXT: bl .___memmove[PR]
; ASM32BIT-DAG: lwz 4, 0([[REG1]])
; ASM32BIT-DAG: lwz 5, 4([[REG1]])
; ASM32BIT-DAG: lwz 6, 8([[REG1]])
@@ -351,7 +351,7 @@ entry:
; 64BIT-DAG: $x3 = COPY %2
; 64BIT-DAG: $x4 = COPY %1
; 64BIT-DAG: $x5 = COPY %3
-; 64BIT-NEXT: BL8_NOP &".memcpy[PR]", csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
+; 64BIT-NEXT: BL8_NOP &".___memmove64[PR]", csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x2, implicit-def $r1, implicit-def $x3
; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
; 64BIT: ADJCALLSTACKDOWN 344, 0, implicit-def dead $r1, implicit $r1
; 64BIT-DAG: $x3 = COPY %{{[0-9]+}}
@@ -370,7 +370,7 @@ entry:
; ASM64BIT-DAG: addi 3, 1, 112
; ASM64BIT-DAG: addi 4, [[REG1]], 24
; ASM64BIT-DAG: li 5, 232
-; ASM64BIT-NEXT: bl .memcpy[PR]
+; ASM64BIT-NEXT: bl .___memmove64[PR]
; ASM64BIT-DAG: ld [[REG2:[0-9]+]], L..C{{[0-9]+}}(2)
; ASM64BIT-DAG: ld 4, 0([[REG2]])
; ASM64BIT-DAG: ld 5, 8([[REG2]])
diff --git a/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll b/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll
index 920c0a4b5ba31..b4b695138376d 100644
--- a/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll
@@ -1,8 +1,8 @@
; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 -mattr=-altivec < %s | \
-; RUN: FileCheck %s
+; RUN: FileCheck --check-prefix=CHECK32 %s
; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr4 -mattr=-altivec < %s | \
-; RUN: FileCheck %s
+; RUN: FileCheck --check-prefix=CHECK64 %s
; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \
; RUN: -mattr=-altivec -filetype=obj -o %t.o < %s
@@ -33,9 +33,10 @@ declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: mflr 0
-; CHECK: bl .memset
-
-; CHECK: .extern .memset
+; CHECK32: bl .___memset
+; CHECK32: .extern .___memset
+; CHECK64: bl .___memset64
+; CHECK64: .extern .___memset64
; CHECKSYM: Symbol {
; CHECKSYM-NEXT: Index: 0
@@ -49,7 +50,8 @@ declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
; CHECKSYM-NEXT: }
; CHECKSYM-NEXT: Symbol {
; CHECKSYM-NEXT: Index: 1
-; CHECKSYM-NEXT: Name: .memset
+; CHECKSYM32-NEXT: Name: .___memset
+; CHECKSYM64-NEXT: Name: .___memset64
; CHECKSYM-NEXT: Value (RelocatableAddress): 0x0
; CHECKSYM-NEXT: Section: N_UNDEF
; CHECKSYM-NEXT: Type: 0x0
@@ -83,5 +85,5 @@ declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
; CHECKRELOC-NEXT: 10: 80 83 00 04 lwz 4, 4(3)
; CHECKRELOC-NEXT: 14: 7c 85 23 78 mr 5, 4
; CHECKRELOC-NEXT: 18: 4b ff ff e9 bl 0x0
-; CHECKRELOC32-NEXT: 00000018: R_RBR (idx: 1) .memset[PR]
-; CHECKRELOC64-NEXT: 0000000000000018: R_RBR (idx: 1) .memset[PR]
+; CHECKRELOC32-NEXT: 00000018: R_RBR (idx: 1) .___memset[PR]
+; CHECKRELOC64-NEXT: 0000000000000018: R_RBR (idx: 1) .___memset64[PR]
diff --git a/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll b/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll
index 26c1e7e0ace02..6891637b8fcbf 100644
--- a/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll
@@ -38,6 +38,25 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture reado
; CHECK-NOT: .extern .memcpy
+; 32-SYM: Symbol {{[{][[:space:]] *}}Index: [[#Index:]]{{[[:space:]] *}}Name: .___memmove
+; 32-SYM-NEXT: Value (RelocatableAddress): 0x0
+; 32-SYM-NEXT: Section: N_UNDEF
+; 32-SYM-NEXT: Type: 0x0
+; 32-SYM-NEXT: StorageClass: C_EXT (0x2)
+; 32-SYM-NEXT: NumberOfAuxEntries: 1
+; 32-SYM-NEXT: CSECT Auxiliary Entry {
+; 32-SYM-NEXT: Index: 2
+; 32-SYM-NEXT: SectionLen: 0
+; 32-SYM-NEXT: ParameterHashIndex: 0x0
+; 32-SYM-NEXT: TypeChkSectNum: 0x0
+; 32-SYM-NEXT: SymbolAlignmentLog2: 0
+; 32-SYM-NEXT: SymbolType: XTY_ER (0x0)
+; 32-SYM-NEXT: StorageMappingClass: XMC_PR (0x0)
+; 32-SYM-NEXT: StabInfoIndex: 0x0
+; 32-SYM-NEXT: StabSectNum: 0x0
+; 32-SYM-NEXT: }
+; 32-SYM-NEXT: }
+
; 32-SYM: Symbol {{[{][[:space:]] *}}Index: [[#Index:]]{{[[:space:]] *}}Name: .memcpy
; 32-SYM-NEXT: Value (RelocatableAddress): 0x0
; 32-SYM-NEXT: Section: .text
@@ -45,8 +64,8 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture reado
; 32-SYM-NEXT: StorageClass: C_EXT (0x2)
; 32-SYM-NEXT: NumberOfAuxEntries: 1
; 32-SYM-NEXT: CSECT Auxiliary Entry {
-; 32-SYM-NEXT: Index: 4
-; 32-SYM-NEXT: ContainingCsectSymbolIndex: 1
+; 32-SYM-NEXT: Index: 6
+; 32-SYM-NEXT: ContainingCsectSymbolIndex: 3
; 32-SYM-NEXT: ParameterHashIndex: 0x0
; 32-SYM-NEXT: TypeChkSectNum: 0x0
; 32-SYM-NEXT: SymbolAlignmentLog2: 0
@@ -60,10 +79,20 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture reado
; 32-SYM-NOT: .memcpy
; 32-REL: Relocations [
+; 32-REL-NEXT: Section (index: 1) .text {
+; 32-REL-NEXT: Relocation {
+; 32-REL-NEXT: Virtual Address: 0x1C
+; 32-REL-NEXT: Symbol: .___memmove (1)
+; 32-REL-NEXT: IsSigned: Yes
+; 32-REL-NEXT: FixupBitValue: 0
+; 32-REL-NEXT: Length: 26
+; 32-REL-NEXT: Type: R_RBR (0x1A)
+; 32-REL-NEXT: }
+; 32-REL-NEXT:}
; 32-REL-NEXT: Section (index: 2) .data {
; 32-REL-NEXT: Relocation {
; 32-REL-NEXT: Virtual Address: 0x34
-; 32-REL-NEXT: Symbol: .memcpy (3)
+; 32-REL-NEXT: Symbol: .memcpy (5)
; 32-REL-NEXT: IsSigned: No
; 32-REL-NEXT: FixupBitValue: 0
; 32-REL-NEXT: Length: 32
@@ -71,7 +100,7 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture reado
; 32-REL-NEXT: }
; 32-REL-NEXT: Relocation {
; 32-REL-NEXT: Virtual Address: 0x38
-; 32-REL-NEXT: Symbol: TOC (11)
+; 32-REL-NEXT: Symbol: TOC (13)
; 32-REL-NEXT: IsSigned: No
; 32-REL-NEXT: FixupBitValue: 0
; 32-REL-NEXT: Length: 32
@@ -79,7 +108,7 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture reado
; 32-REL-NEXT: }
; 32-REL-NEXT: Relocation {
; 32-REL-NEXT: Virtual Address: 0x40
-; 32-REL-NEXT: Symbol: .call_memcpy (5)
+; 32-REL-NEXT: Symbol: .call_memcpy (7)
; 32-REL-NEXT: IsSigned: No
; 32-REL-NEXT: FixupBitValue: 0
; 32-REL-NEXT: Length: 32
@@ -87,7 +116,7 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture reado
; 32-REL-NEXT: }
; 32-REL-NEXT: Relocation {
; 32-REL-NEXT: Virtual Address: 0x44
-; 32-REL-NEXT: Symbol: TOC (11)
+; 32-REL-NEXT: Symbol: TOC (13)
; 32-REL-NEXT: IsSigned: No
; 32-REL-NEXT: FixupBitValue: 0
; 32-REL-NEXT: Length: 32
More information about the llvm-commits
mailing list