[llvm] 181279f - [X86][GlobalISel] Add support for sret demotion
Serge Pavlov via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 19 22:31:06 PDT 2022
Author: Serge Pavlov
Date: 2022-09-20T11:47:53+07:00
New Revision: 181279ffcde14fb4486de5350ba784ac9ceec338
URL: https://github.com/llvm/llvm-project/commit/181279ffcde14fb4486de5350ba784ac9ceec338
DIFF: https://github.com/llvm/llvm-project/commit/181279ffcde14fb4486de5350ba784ac9ceec338.diff
LOG: [X86][GlobalISel] Add support for sret demotion
The change add support for the cases when return value is passed in
memory rathen than in registers.
Differential Revision: https://reviews.llvm.org/D134181
Added:
Modified:
llvm/lib/Target/X86/X86CallLowering.cpp
llvm/lib/Target/X86/X86CallLowering.h
llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp
index a14ce82313cb2..fd5163028104b 100644
--- a/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
@@ -129,15 +130,29 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
} // end anonymous namespace
+bool X86CallLowering::canLowerReturn(
+ MachineFunction &MF, CallingConv::ID CallConv,
+ SmallVectorImpl<CallLowering::BaseArgInfo> &Outs, bool IsVarArg) const {
+ LLVMContext &Context = MF.getFunction().getContext();
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+ return checkReturn(CCInfo, Outs, RetCC_X86);
+}
+
bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, ArrayRef<Register> VRegs,
FunctionLoweringInfo &FLI) const {
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
"Return value without a vreg");
+ MachineFunction &MF = MIRBuilder.getMF();
auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ bool Is64Bit = STI.is64Bit();
- if (!VRegs.empty()) {
- MachineFunction &MF = MIRBuilder.getMF();
+ if (!FLI.CanLowerReturn) {
+ insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
+ MIRBuilder.buildCopy(Is64Bit ? X86::RAX : X86::EAX, FLI.DemoteRegister);
+ } else if (!VRegs.empty()) {
const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
const DataLayout &DL = MF.getDataLayout();
@@ -238,18 +253,19 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<ArrayRef<Register>> VRegs,
FunctionLoweringInfo &FLI) const {
- if (F.arg_empty())
- return true;
-
- // TODO: handle variadic function
- if (F.isVarArg())
- return false;
-
MachineFunction &MF = MIRBuilder.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto DL = MF.getDataLayout();
SmallVector<ArgInfo, 8> SplitArgs;
+
+ if (!FLI.CanLowerReturn)
+ insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
+
+ // TODO: handle variadic function
+ if (F.isVarArg())
+ return false;
+
unsigned Idx = 0;
for (const auto &Arg : F.args()) {
// TODO: handle not simple cases.
@@ -267,6 +283,9 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
Idx++;
}
+ if (SplitArgs.empty())
+ return true;
+
MachineBasicBlock &MBB = MIRBuilder.getMBB();
if (!MBB.empty())
MIRBuilder.setInstr(*MBB.begin());
@@ -363,7 +382,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// symmetry with the arguments, the physical register must be an
// implicit-define of the call instruction.
- if (!Info.OrigRet.Ty->isVoidTy()) {
+ if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
if (Info.OrigRet.Regs.size() > 1)
return false;
@@ -391,5 +410,9 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
.addImm(Assigner.getStackSize())
.addImm(0 /* NumBytesForCalleeToPop */);
+ if (!Info.CanLowerReturn)
+ insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
+ Info.DemoteRegister, Info.DemoteStackIndex);
+
return true;
}
diff --git a/llvm/lib/Target/X86/X86CallLowering.h b/llvm/lib/Target/X86/X86CallLowering.h
index 0ad67cfd35326..9067abf060bd9 100644
--- a/llvm/lib/Target/X86/X86CallLowering.h
+++ b/llvm/lib/Target/X86/X86CallLowering.h
@@ -36,6 +36,10 @@ class X86CallLowering : public CallLowering {
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
+
+ bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ bool IsVarArg) const override;
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
index e9e57f396f641..d1a7339db9af5 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
@@ -716,3 +716,60 @@ define void @test_variadic_call_2(ptr %addr_ptr, ptr %val_ptr) {
call void (ptr, ...) @variadic_callee(ptr %addr, double %val)
ret void
}
+
+; Return value is in memory unless subtarget is AVX or higher.
+define <32 x float> @test_return_v32f32() {
+ ; X86-LABEL: name: test_return_v32f32
+ ; X86: bb.1 (%ir-block.0):
+ ; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; X86-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
+ ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; X86-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; X86-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[LOAD]](p0) :: (store (<32 x s32>))
+ ; X86-NEXT: $eax = COPY [[LOAD]](p0)
+ ; X86-NEXT: RET 0
+ ; X64-LABEL: name: test_return_v32f32
+ ; X64: bb.1 (%ir-block.0):
+ ; X64-NEXT: liveins: $rdi
+ ; X64-NEXT: {{ $}}
+ ; X64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
+ ; X64-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+ ; X64-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; X64-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[COPY]](p0) :: (store (<32 x s32>))
+ ; X64-NEXT: $rax = COPY [[COPY]](p0)
+ ; X64-NEXT: RET 0
+ ret <32 x float> zeroinitializer
+}
+
+define float @test_call_v32f32() {
+ ; X86-LABEL: name: test_call_v32f32
+ ; X86: bb.1 (%ir-block.0):
+ ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+ ; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; X86-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+ ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp
+ ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32)
+ ; X86-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
+ ; X86-NEXT: CALLpcrel32 @test_return_v32f32, csr_32, implicit $esp, implicit $ssp
+ ; X86-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
+ ; X86-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (load (<32 x s32>) from %stack.0)
+ ; X86-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<32 x s32>), [[C]](s32)
+ ; X86-NEXT: $fp0 = COPY [[EVEC]](s32)
+ ; X86-NEXT: RET 0, implicit $fp0
+ ; X64-LABEL: name: test_call_v32f32
+ ; X64: bb.1 (%ir-block.0):
+ ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; X64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+ ; X64-NEXT: $rdi = COPY [[FRAME_INDEX]](p0)
+ ; X64-NEXT: CALL64pcrel32 @test_return_v32f32, csr_64, implicit $rsp, implicit $ssp, implicit $rdi
+ ; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
+ ; X64-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (load (<32 x s32>) from %stack.0)
+ ; X64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<32 x s32>), [[C]](s64)
+ ; X64-NEXT: $xmm0 = COPY [[EVEC]](s32)
+ ; X64-NEXT: RET 0, implicit $xmm0
+ %vect = call <32 x float> @test_return_v32f32()
+ %elt = extractelement <32 x float> %vect, i32 7
+ ret float %elt
+}
More information about the llvm-commits
mailing list