[llvm] a14b4e3 - [GlobalISel] Tail call memcpy/memmove/memset even in the presence of copies
Jon Roelofs via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 20 17:04:47 PDT 2021
Author: Jon Roelofs
Date: 2021-07-20T17:04:33-07:00
New Revision: a14b4e34a4569bb000ccdd4501628e1b891bcb38
URL: https://github.com/llvm/llvm-project/commit/a14b4e34a4569bb000ccdd4501628e1b891bcb38
DIFF: https://github.com/llvm/llvm-project/commit/a14b4e34a4569bb000ccdd4501628e1b891bcb38.diff
LOG: [GlobalISel] Tail call memcpy/memmove/memset even in the presence of copies
Differentail revision: https://reviews.llvm.org/D105382
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index edd79697593d..72d4fbe9e376 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -480,8 +480,9 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
/// True if an instruction is in tail position in its caller. Intended for
/// legalizing libcalls as tail calls when possible.
-static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
- MachineInstr &MI) {
+static bool isLibCallInTailPosition(MachineInstr &MI,
+ const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI) {
MachineBasicBlock &MBB = *MI.getParent();
const Function &F = MBB.getParent()->getFunction();
@@ -500,8 +501,47 @@ static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
return false;
- // Only tail call if the following instruction is a standard return.
+ // Only tail call if the following instruction is a standard return or if we
+ // have a `thisreturn` callee, and a sequence like:
+ //
+ // G_MEMCPY %0, %1, %2
+ // $x0 = COPY %0
+ // RET_ReallyLR implicit $x0
auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
+ if (Next != MBB.instr_end() && Next->isCopy()) {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unsupported opcode");
+ case TargetOpcode::G_BZERO:
+ return false;
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET:
+ break;
+ }
+
+ Register VReg = MI.getOperand(0).getReg();
+ if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
+ return false;
+
+ Register PReg = Next->getOperand(0).getReg();
+ if (!PReg.isPhysical())
+ return false;
+
+ auto Ret = next_nodbg(Next, MBB.instr_end());
+ if (Ret == MBB.instr_end() || !Ret->isReturn())
+ return false;
+
+ if (Ret->getNumImplicitOperands() != 1)
+ return false;
+
+ if (PReg != Ret->getOperand(0).getReg())
+ return false;
+
+ // Skip over the COPY that we just validated.
+ Next = Ret;
+ }
+
if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
return false;
@@ -607,7 +647,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
- isLibCallInTailPosition(MIRBuilder.getTII(), MI);
+ isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
@@ -623,7 +663,8 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
// isLibCallInTailPosition.
do {
MachineInstr *Next = MI.getNextNode();
- assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
+ assert(Next &&
+ (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
"Expected instr following MI to be return or debug inst?");
// We lowered a tail call, so the call is now the return from the block.
// Delete the old return.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir
index f0526cc3d658..02470c7a61a2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir
@@ -121,8 +121,8 @@ body: |
; CHECK-LABEL: name: no_tail_call
; CHECK: liveins: $w2, $x0, $x1
- ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
@@ -133,8 +133,8 @@ body: |
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK: $x0 = COPY [[ZEXT]](s64)
; CHECK: RET_ReallyLR implicit $x0
- %0:_(p0) = COPY $x0
- %1:_(p0) = COPY $x1
+ %0:_(p0) = COPY $x1
+ %1:_(p0) = COPY $x0
%2:_(s32) = COPY $w2
%3:_(s64) = G_ZEXT %2(s32)
G_MEMCPY %0(p0), %1(p0), %3(s64), 1 :: (store unknown-size), (load unknown-size)
@@ -168,3 +168,201 @@ body: |
%3:_(s64) = G_ZEXT %2(s32)
G_MEMCPY %0(p0), %1(p0), %3(s64), 1 :: (store unknown-size), (load unknown-size)
TCRETURNdi &memset, 0, csr_aarch64_aapcs, implicit $sp
+
+...
+---
+name: tail_with_copy_ret
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: tail_with_copy_ret
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: $x0 = COPY [[COPY]](p0)
+ ; CHECK: $x1 = COPY [[COPY1]](p0)
+ ; CHECK: $x2 = COPY [[COPY2]](s64)
+ ; CHECK: TCRETURNdi &memcpy, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1, implicit $x2
+ %0:_(p0) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %2:_(s64) = COPY $x2
+ G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8))
+ $x0 = COPY %0(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: dont_tc_mismatched_copies
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: dont_tc_mismatched_copies
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK: $x0 = COPY [[COPY]](p0)
+ ; CHECK: $x1 = COPY [[COPY1]](p0)
+ ; CHECK: $x2 = COPY [[COPY2]](s64)
+ ; CHECK: BL &memcpy, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2
+ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK: $x1 = COPY [[COPY]](p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:_(p0) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %2:_(s64) = COPY $x2
+ G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8))
+ $x1 = COPY %0(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: dont_tc_extra_copy
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: dont_tc_extra_copy
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK: $x0 = COPY [[COPY]](p0)
+ ; CHECK: $x1 = COPY [[COPY1]](p0)
+ ; CHECK: $x2 = COPY [[COPY2]](s64)
+ ; CHECK: BL &memcpy, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2
+ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK: $x0 = COPY [[COPY]](p0)
+ ; CHECK: $x0 = COPY [[COPY]](p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:_(p0) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %2:_(s64) = COPY $x2
+ G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8))
+ $x0 = COPY %0(p0)
+ $x0 = COPY %0(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: dont_tc_mismatched_ret
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: dont_tc_mismatched_ret
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK: $x0 = COPY [[COPY]](p0)
+ ; CHECK: $x1 = COPY [[COPY1]](p0)
+ ; CHECK: $x2 = COPY [[COPY2]](s64)
+ ; CHECK: BL &memcpy, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2
+ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK: $x1 = COPY [[COPY]](p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:_(p0) = COPY $x1
+ %1:_(p0) = COPY $x0
+ %2:_(s64) = COPY $x2
+ G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8))
+ $x1 = COPY %0(p0)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: dont_tc_ret_void_copy
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: dont_tc_ret_void_copy
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK: $x0 = COPY [[COPY]](p0)
+ ; CHECK: $x1 = COPY [[COPY1]](p0)
+ ; CHECK: $x2 = COPY [[COPY2]](s64)
+ ; CHECK: BL &memcpy, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2
+ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK: $x0 = COPY [[COPY]](p0)
+ ; CHECK: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %2:_(s64) = COPY $x2
+ G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8)), (load (s8))
+ $x0 = COPY %0(p0)
+ RET_ReallyLR
More information about the llvm-commits
mailing list