[llvm] b97013f - [AArch64][GlobalISel] Add support for sret demotion.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 5 15:23:52 PDT 2022


Author: Amara Emerson
Date: 2022-07-05T15:23:47-07:00
New Revision: b97013fd60977ba62a747729183e033c0900de8c

URL: https://github.com/llvm/llvm-project/commit/b97013fd60977ba62a747729183e033c0900de8c
DIFF: https://github.com/llvm/llvm-project/commit/b97013fd60977ba62a747729183e033c0900de8c.diff

LOG: [AArch64][GlobalISel] Add support for sret demotion.

To do this, we need to implement a target hook and make a minor change to the
call lowering to demote arguments to sret if they can't be handled by the
calling conventions.

Fixes issue 56295

Differential Revision: https://reviews.llvm.org/D129098

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-sret-demotion.ll

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
    llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 89e1d85a6085d..aaef363e9b8dc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/ObjCARCUtil.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/LowLevelType.h"
@@ -354,7 +355,9 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
          "Return value without a vreg");
 
   bool Success = true;
-  if (!VRegs.empty()) {
+  if (!FLI.CanLowerReturn) {
+    insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister);
+  } else if (!VRegs.empty()) {
     MachineFunction &MF = MIRBuilder.getMF();
     const Function &F = MF.getFunction();
     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -464,6 +467,18 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
   return Success;
 }
 
+bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
+                                         CallingConv::ID CallConv,
+                                         SmallVectorImpl<BaseArgInfo> &Outs,
+                                         bool IsVarArg) const {
+  SmallVector<CCValAssign, 16> ArgLocs;
+  const auto &TLI = *getTLI<AArch64TargetLowering>();
+  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
+                 MF.getFunction().getContext());
+
+  return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv));
+}
+
 /// Helper function to compute forwarded registers for musttail calls. Computes
 /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
 /// can be used to save + restore registers later.
@@ -533,6 +548,12 @@ bool AArch64CallLowering::lowerFormalArguments(
 
   SmallVector<ArgInfo, 8> SplitArgs;
   SmallVector<std::pair<Register, Register>> BoolArgs;
+
+  // Insert the hidden sret parameter if the return value won't fit in the
+  // return registers.
+  if (!FLI.CanLowerReturn)
+    insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
+
   unsigned i = 0;
   for (auto &Arg : F.args()) {
     if (DL.getTypeStoreSize(Arg.getType()).isZero())
@@ -1194,7 +1215,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   // Finally we can copy the returned value back into its virtual-register. In
   // symmetry with the arguments, the physical register must be an
   // implicit-define of the call instruction.
-  if (!Info.OrigRet.Ty->isVoidTy()) {
+  if (Info.CanLowerReturn  && !Info.OrigRet.Ty->isVoidTy()) {
     CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
     CallReturnHandler Handler(MIRBuilder, MRI, MIB);
     bool UsingReturnedArg =
@@ -1226,6 +1247,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
       .addImm(Assigner.StackOffset)
       .addImm(CalleePopBytes);
 
+  if (!Info.CanLowerReturn) {
+    insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
+                    Info.DemoteRegister, Info.DemoteStackIndex);
+  }
   return true;
 }
 

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
index aafb1d19640a8..cbdf77f69a637 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -35,6 +35,10 @@ class AArch64CallLowering: public CallLowering {
                    ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
                    Register SwiftErrorVReg) const override;
 
+  bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
+                      SmallVectorImpl<BaseArgInfo> &Outs,
+                      bool IsVarArg) const override;
+
   bool fallBackToDAGISel(const MachineFunction &MF) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-sret-demotion.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-sret-demotion.ll
new file mode 100644
index 0000000000000..a8520af8b0ae3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-sret-demotion.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mtriple=aarch64 -global-isel -stop-after=irtranslator -verify-machineinstrs -o - %s | FileCheck %s
+
+
+define [9 x i64] @callee_sret_demotion() {
+  ; CHECK-LABEL: name: callee_sret_demotion
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x8
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x8
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64))
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64))
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s64), [[PTR_ADD1]](p0) :: (store (s64))
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s64), [[PTR_ADD2]](p0) :: (store (s64))
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s64), [[PTR_ADD3]](p0) :: (store (s64))
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s64), [[PTR_ADD4]](p0) :: (store (s64))
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s64), [[PTR_ADD5]](p0) :: (store (s64))
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+  ; CHECK-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s64), [[PTR_ADD6]](p0) :: (store (s64))
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+  ; CHECK-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s64), [[PTR_ADD7]](p0) :: (store (s64))
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret [9 x i64] zeroinitializer
+}
+
+define i64 @caller() {
+  ; CHECK-LABEL: name: caller
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $x8 = COPY [[FRAME_INDEX]](p0)
+  ; CHECK-NEXT:   BL @callee_sret_demotion, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s64)
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s64)
+  ; CHECK-NEXT:   [[LOAD5:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s64)
+  ; CHECK-NEXT:   [[LOAD6:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD5]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+  ; CHECK-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s64)
+  ; CHECK-NEXT:   [[LOAD7:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+  ; CHECK-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s64)
+  ; CHECK-NEXT:   [[LOAD8:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD7]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   $x0 = COPY [[LOAD4]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  %res = call [9 x i64] @callee_sret_demotion()
+  %val = extractvalue [9 x i64] %res, 4
+  ret i64 %val
+}
+
+define i64 @caller_tail() {
+  ; CHECK-LABEL: name: caller_tail
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $x8 = COPY [[FRAME_INDEX]](p0)
+  ; CHECK-NEXT:   BL @callee_sret_demotion, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s64)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s64)
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s64)
+  ; CHECK-NEXT:   [[LOAD5:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s64)
+  ; CHECK-NEXT:   [[LOAD6:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD5]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
+  ; CHECK-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s64)
+  ; CHECK-NEXT:   [[LOAD7:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+  ; CHECK-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s64)
+  ; CHECK-NEXT:   [[LOAD8:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD7]](p0) :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   $x0 = COPY [[LOAD4]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  %res = tail call [9 x i64] @callee_sret_demotion()
+  %val = extractvalue [9 x i64] %res, 4
+  ret i64 %val
+}


        


More information about the llvm-commits mailing list