[llvm] f9d932e - [clang][AArch64] Correctly align HFA arguments when passed on the stack

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 15 14:58:46 PDT 2021


Author: Momchil Velikov
Date: 2021-04-15T22:58:14+01:00
New Revision: f9d932e6735afe73117e142a12443449f2197e69

URL: https://github.com/llvm/llvm-project/commit/f9d932e6735afe73117e142a12443449f2197e69
DIFF: https://github.com/llvm/llvm-project/commit/f9d932e6735afe73117e142a12443449f2197e69.diff

LOG: [clang][AArch64] Correctly align HFA arguments when passed on the stack

When we pass a AArch64 Homogeneous Floating-Point
Aggregate (HFA) argument with increased alignment
requirements, for example

    struct S {
      __attribute__ ((__aligned__(16))) double v[4];
    };

Clang uses `[4 x double]` for the parameter, which is passed
on the stack at alignment 8, whereas it should be at
alignment 16, following Rule C.4 in
AAPCS (https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#642parameter-passing-rules)

Currently we don't have a way to express in LLVM IR the
alignment requirements of the function arguments. The align
attribute is applicable to pointers only, and only for some
special ways of passing arguments (e..g byval). When
implementing AAPCS32/AAPCS64, clang resorts to dubious hacks
of coercing to types, which naturally have the needed
alignment. We don't have enough types to cover all the
cases, though.

This patch introduces a new use of the stackalign attribute
to control stack slot alignment, when and if an argument is
passed in memory.

The attribute align is left as an optimizer hint - it still
applies to pointer types only and pertains to the content of
the pointer, whereas the alignment of the pointer itself is
determined by the stackalign attribute.

For byval arguments, the stackalign attribute assumes the
role, previously perfomed by align, falling back to align if
stackalign` is absent.

On the clang side, when passing arguments using the "direct"
style (cf. `ABIArgInfo::Kind`), now we can optionally
specify an alignment, which is emitted as the new
`stackalign` attribute.

Patch by Momchil Velikov and Lucas Prates.

Differential Revision: https://reviews.llvm.org/D98794

Added: 
    clang/test/CodeGen/aarch64-args-hfa.c
    llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll

Modified: 
    clang/include/clang/CodeGen/CGFunctionInfo.h
    clang/lib/CodeGen/CGCall.cpp
    clang/lib/CodeGen/TargetInfo.cpp
    llvm/docs/LangRef.rst
    llvm/include/llvm/CodeGen/TargetCallingConv.h
    llvm/include/llvm/IR/Argument.h
    llvm/include/llvm/IR/Attributes.h
    llvm/include/llvm/IR/Function.h
    llvm/include/llvm/IR/InstrTypes.h
    llvm/lib/AsmParser/LLParser.cpp
    llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
    llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/IR/Attributes.cpp
    llvm/lib/IR/Function.cpp
    llvm/lib/IR/Verifier.cpp
    llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
    llvm/test/Bitcode/compatibility.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h
index 253ef946ce15d..91d867e7f64a5 100644
--- a/clang/include/clang/CodeGen/CGFunctionInfo.h
+++ b/clang/include/clang/CodeGen/CGFunctionInfo.h
@@ -94,12 +94,17 @@ class ABIArgInfo {
     llvm::Type *UnpaddedCoerceAndExpandType; // isCoerceAndExpand()
   };
   union {
-    unsigned DirectOffset;     // isDirect() || isExtend()
-    unsigned IndirectAlign;    // isIndirect()
+    struct {
+      unsigned Offset;
+      unsigned Align;
+    } DirectAttr;              // isDirect() || isExtend()
+    struct {
+      unsigned Align;
+      unsigned AddrSpace;
+    } IndirectAttr;            // isIndirect()
     unsigned AllocaFieldIndex; // isInAlloca()
   };
   Kind TheKind;
-  unsigned IndirectAddrSpace : 24; // isIndirect()
   bool PaddingInReg : 1;
   bool InAllocaSRet : 1;    // isInAlloca()
   bool InAllocaIndirect : 1;// isInAlloca()
@@ -126,19 +131,20 @@ class ABIArgInfo {
 
 public:
   ABIArgInfo(Kind K = Direct)
-      : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
-        IndirectAddrSpace(0), PaddingInReg(false), InAllocaSRet(false),
+      : TypeData(nullptr), PaddingType(nullptr), DirectAttr{0, 0}, TheKind(K),
+        PaddingInReg(false), InAllocaSRet(false),
         InAllocaIndirect(false), IndirectByVal(false), IndirectRealign(false),
         SRetAfterThis(false), InReg(false), CanBeFlattened(false),
         SignExt(false) {}
 
   static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0,
                               llvm::Type *Padding = nullptr,
-                              bool CanBeFlattened = true) {
+                              bool CanBeFlattened = true, unsigned Align = 0) {
     auto AI = ABIArgInfo(Direct);
     AI.setCoerceToType(T);
     AI.setPaddingType(Padding);
     AI.setDirectOffset(Offset);
+    AI.setDirectAlign(Align);
     AI.setCanBeFlattened(CanBeFlattened);
     return AI;
   }
@@ -154,6 +160,7 @@ class ABIArgInfo {
     AI.setCoerceToType(T);
     AI.setPaddingType(nullptr);
     AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
     AI.setSignExt(true);
     return AI;
   }
@@ -164,6 +171,7 @@ class ABIArgInfo {
     AI.setCoerceToType(T);
     AI.setPaddingType(nullptr);
     AI.setDirectOffset(0);
+    AI.setDirectAlign(0);
     AI.setSignExt(false);
     return AI;
   }
@@ -299,11 +307,20 @@ class ABIArgInfo {
   // Direct/Extend accessors
   unsigned getDirectOffset() const {
     assert((isDirect() || isExtend()) && "Not a direct or extend kind");
-    return DirectOffset;
+    return DirectAttr.Offset;
   }
   void setDirectOffset(unsigned Offset) {
     assert((isDirect() || isExtend()) && "Not a direct or extend kind");
-    DirectOffset = Offset;
+    DirectAttr.Offset = Offset;
+  }
+
+  unsigned getDirectAlign() const {
+    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    return DirectAttr.Align;
+  }
+  void setDirectAlign(unsigned Align) {
+    assert((isDirect() || isExtend()) && "Not a direct or extend kind");
+    DirectAttr.Align = Align;
   }
 
   bool isSignExt() const {
@@ -369,11 +386,11 @@ class ABIArgInfo {
   // Indirect accessors
   CharUnits getIndirectAlign() const {
     assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
-    return CharUnits::fromQuantity(IndirectAlign);
+    return CharUnits::fromQuantity(IndirectAttr.Align);
   }
   void setIndirectAlign(CharUnits IA) {
     assert((isIndirect() || isIndirectAliased()) && "Invalid kind!");
-    IndirectAlign = IA.getQuantity();
+    IndirectAttr.Align = IA.getQuantity();
   }
 
   bool getIndirectByVal() const {
@@ -387,12 +404,12 @@ class ABIArgInfo {
 
   unsigned getIndirectAddrSpace() const {
     assert(isIndirectAliased() && "Invalid kind!");
-    return IndirectAddrSpace;
+    return IndirectAttr.AddrSpace;
   }
 
   void setIndirectAddrSpace(unsigned AddrSpace) {
     assert(isIndirectAliased() && "Invalid kind!");
-    IndirectAddrSpace = AddrSpace;
+    IndirectAttr.AddrSpace = AddrSpace;
   }
 
   bool getIndirectRealign() const {

diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 1d71148d67e67..0474ddb034283 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2363,6 +2363,7 @@ void CodeGenModule::ConstructAttributeList(
         Attrs.addAttribute(llvm::Attribute::Nest);
       else if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
+      Attrs.addStackAlignmentAttr(llvm::MaybeAlign(AI.getDirectAlign()));
       break;
 
     case ABIArgInfo::Indirect: {

diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index bd3c265378921..3e0a269462ff9 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -5690,8 +5690,19 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
   // In variadic functions on Windows, all composite types are treated alike,
   // no special handling of HFAs/HVAs.
   if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
+    if (Kind != AArch64ABIInfo::AAPCS)
+      return ABIArgInfo::getDirect(
+          llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
+
+    // For alignment adjusted HFAs, cap the argument alignment to 16, leave it
+    // default otherwise.
+    unsigned Align =
+        getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
+    unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
+    Align = (Align > BaseAlign && Align >= 16) ? 16 : 0;
     return ABIArgInfo::getDirect(
-        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
+        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
+        nullptr, true, Align);
   }
 
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.

diff  --git a/clang/test/CodeGen/aarch64-args-hfa.c b/clang/test/CodeGen/aarch64-args-hfa.c
new file mode 100644
index 0000000000000..4abdc426a5ed1
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-args-hfa.c
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple aarch64-none-eabi -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
+// RUN: %clang_cc1 -triple arm64-apple-ios7.0 -target-abi darwinpcs -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DARWIN
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - -x c %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
+
+typedef struct {
+  float v[2];
+} S0;
+
+// CHECK: define{{.*}} float @f0([2 x float] %h.coerce)
+float f0(S0 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} float @f0_call()
+// CHECK: %call = call float @f0([2 x float] %1)
+float f0_call() {
+  S0 h = {1.0f, 2.0f};
+  return f0(h);
+}
+typedef struct {
+  double v[2];
+} S1;
+
+// CHECK: define{{.*}} double @f1([2 x double] %h.coerce)
+double f1(S1 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} double @f1_call()
+// CHECK: %call = call double @f1([2 x double] %1
+double f1_call() {
+  S1 h = {1.0, 2.0};
+  return f1(h);
+}
+typedef struct {
+  __attribute__((__aligned__(16))) double v[2];
+} S2;
+
+// CHECK-AAPCS:  define{{.*}} double @f2([2 x double] alignstack(16) %h.coerce)
+// CHECK-DARWIN: define{{.*}} double @f2([2 x double] %h.coerce)
+double f2(S2 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} double @f2_call()
+// CHECK-AAPCS:  %call = call double @f2([2 x double] alignstack(16) %1)
+// CHECK-DARWIN: %call = call double @f2([2 x double] %1
+double f2_call() {
+  S2 h = {1.0, 2.0};
+  return f2(h);
+}
+
+typedef struct {
+  __attribute__((__aligned__(32))) double v[4];
+} S3;
+
+// CHECK-AAPCS:  define{{.*}} double @f3([4 x double] alignstack(16) %h.coerce)
+// CHECK-DARWIN: define{{.*}} double @f3([4 x double] %h.coerce)
+double f3(S3 h) {
+  return h.v[0];
+}
+
+// CHECK: define{{.*}} double @f3_call()
+// CHECK-AAPCS:  %call = call double @f3([4 x double] alignstack(16) %1)
+// CHECK-DARWIN: %call = call double @f3([4 x double] %1
+double f3_call() {
+  S3 h = {1.0, 2.0};
+  return f3(h);
+}

diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 64436f22f30ea..1792bc69ff777 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1300,6 +1300,15 @@ Currently, only the following parameter attributes are defined:
     undefined. Note that this does not refer to padding introduced by the
     type's storage representation.
 
+``alignstack(<n>)``
+    This indicates the alignment that should be considered by the backend when
+    assigning this parameter to a stack slot during calling convention
+    lowering. The enforcement of the specified alignment is target-dependent,
+    as target-specific calling convention rules may override this value. This
+    attribute serves the purpose of carrying language specific alignment
+    information that is not mapped to base types in the backend (for example,
+    over-alignment specification through language attributes).
+
 .. _gc:
 
 Garbage Collector Strategy Names

diff  --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 3084f2ab210bd..88785a8f45d26 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -44,7 +44,8 @@ namespace ISD {
     unsigned IsHva : 1;        ///< HVA field for
     unsigned IsHvaStart : 1;   ///< HVA structure start
     unsigned IsSecArgPass : 1; ///< Second argument
-    unsigned ByValOrByRefAlign : 4; ///< Log 2 of byval/byref alignment
+    unsigned MemAlign : 4;     ///< Log 2 of alignment when arg is passed in memory
+                               ///< (including byval/byref)
     unsigned OrigAlign : 5;    ///< Log 2 of original alignment
     unsigned IsInConsecutiveRegsLast : 1;
     unsigned IsInConsecutiveRegs : 1;
@@ -55,18 +56,12 @@ namespace ISD {
 
     unsigned PointerAddrSpace; ///< Address space of pointer argument
 
-    /// Set the alignment used by byref or byval parameters.
-    void setAlignImpl(Align A) {
-      ByValOrByRefAlign = encode(A);
-      assert(getNonZeroByValAlign() == A && "bitfield overflow");
-    }
-
   public:
     ArgFlagsTy()
       : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsByRef(0),
           IsNest(0), IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0),
           IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0),
-          IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValOrByRefAlign(0),
+          IsHva(0), IsHvaStart(0), IsSecArgPass(0), MemAlign(0),
           OrigAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
           IsCopyElisionCandidate(0), IsPointer(0), ByValOrByRefSize(0),
           PointerAddrSpace(0) {
@@ -141,24 +136,26 @@ namespace ISD {
     bool isPointer()  const { return IsPointer; }
     void setPointer() { IsPointer = 1; }
 
-    Align getNonZeroByValAlign() const {
-      MaybeAlign A = decodeMaybeAlign(ByValOrByRefAlign);
-      assert(A && "ByValAlign must be defined");
-      return *A;
+    Align getNonZeroMemAlign() const {
+      return decodeMaybeAlign(MemAlign).valueOrOne();
     }
-    void setByValAlign(Align A) {
-      assert(isByVal() && !isByRef());
-      setAlignImpl(A);
+
+    void setMemAlign(Align A) {
+      MemAlign = encode(A);
+      assert(getNonZeroMemAlign() == A && "bitfield overflow");
     }
 
-    void setByRefAlign(Align A) {
-      assert(!isByVal() && isByRef());
-      setAlignImpl(A);
+    Align getNonZeroByValAlign() const {
+      assert(isByVal());
+      MaybeAlign A = decodeMaybeAlign(MemAlign);
+      assert(A && "ByValAlign must be defined");
+      return *A;
     }
 
     Align getNonZeroOrigAlign() const {
       return decodeMaybeAlign(OrigAlign).valueOrOne();
     }
+
     void setOrigAlign(Align A) {
       OrigAlign = encode(A);
       assert(getNonZeroOrigAlign() == A && "bitfield overflow");

diff  --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index 4b13e2d2a9e82..218ef91a5b385 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -102,6 +102,8 @@ class Argument final : public Value {
   /// If this is a byval or inalloca argument, return its alignment.
   MaybeAlign getParamAlign() const;
 
+  MaybeAlign getParamStackAlign() const;
+
   /// If this is a byval argument, return its type.
   Type *getParamByValType() const;
 

diff  --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index a8c4017118580..55f342c2c94e9 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -674,6 +674,9 @@ class AttributeList {
   /// Return the alignment for the specified function parameter.
   MaybeAlign getParamAlignment(unsigned ArgNo) const;
 
+  /// Return the stack alignment for the specified function parameter.
+  MaybeAlign getParamStackAlignment(unsigned ArgNo) const;
+
   /// Return the byval type for the specified function parameter.
   Type *getParamByValType(unsigned ArgNo) const;
 

diff  --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index a24b12c1a470a..9659e1865180f 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -483,6 +483,10 @@ class Function : public GlobalObject, public ilist_node<Function> {
     return AttributeSets.getParamAlignment(ArgNo);
   }
 
+  MaybeAlign getParamStackAlign(unsigned ArgNo) const {
+    return AttributeSets.getParamStackAlignment(ArgNo);
+  }
+
   /// Extract the byval type for a parameter.
   Type *getParamByValType(unsigned ArgNo) const {
     return AttributeSets.getParamByValType(ArgNo);

diff  --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index f218bc4cd36fc..ea529abbab7f4 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1731,6 +1731,10 @@ class CallBase : public Instruction {
     return Attrs.getParamAlignment(ArgNo);
   }
 
+  MaybeAlign getParamStackAlign(unsigned ArgNo) const {
+    return Attrs.getParamStackAlignment(ArgNo);
+  }
+
   /// Extract the byval type for a call or parameter.
   Type *getParamByValType(unsigned ArgNo) const {
     Type *Ty = Attrs.getParamByValType(ArgNo);

diff  --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index d69ac7289fe45..db2cb66963d93 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1715,6 +1715,13 @@ bool LLParser::parseOptionalParamAttrs(AttrBuilder &B) {
       B.addAlignmentAttr(Alignment);
       continue;
     }
+    case lltok::kw_alignstack: {
+      unsigned Alignment;
+      if (parseOptionalStackAlignment(Alignment))
+        return true;
+      B.addStackAlignmentAttr(Alignment);
+      continue;
+    }
     case lltok::kw_byval: {
       Type *Ty;
       if (parseRequiredTypeAttr(Ty, lltok::kw_byval))
@@ -1783,7 +1790,6 @@ bool LLParser::parseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
     case lltok::kw_immarg:          B.addAttribute(Attribute::ImmArg); break;
 
-    case lltok::kw_alignstack:
     case lltok::kw_alwaysinline:
     case lltok::kw_argmemonly:
     case lltok::kw_builtin:

diff  --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 808be0ff6381f..440020081feb3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -154,6 +154,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
   const AttributeList &Attrs = FuncInfo.getAttributes();
   addArgFlagsFromAttributes(Flags, Attrs, OpIdx);
 
+  Align MemAlign;
   if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
     Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
 
@@ -162,13 +163,18 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
 
     // For ByVal, alignment should be passed from FE.  BE will guess if
     // this info is not there but there are cases it cannot get right.
-    Align FrameAlign;
-    if (auto ParamAlign = FuncInfo.getParamAlign(OpIdx - 1))
-      FrameAlign = *ParamAlign;
+    if (auto ParamAlign = FuncInfo.getParamStackAlign(OpIdx - 1))
+      MemAlign = *ParamAlign;
+    else if ((ParamAlign = FuncInfo.getParamAlign(OpIdx - 1)))
+      MemAlign = *ParamAlign;
     else
-      FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
-    Flags.setByValAlign(FrameAlign);
+      MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
+  } else if (auto ParamAlign = FuncInfo.getParamStackAlign(OpIdx - 1)) {
+    MemAlign = *ParamAlign;
+  } else {
+    MemAlign = Align(DL.getABITypeAlign(Arg.Ty));
   }
+  Flags.setMemAlign(MemAlign);
   Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
 
   // Don't try to use the returned attribute if the argument is marked as

diff  --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 50faa63be5992..79ac13cce5d6b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1072,6 +1072,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
       // preallocated handling in the various CC lowering callbacks.
       Flags.setByVal();
     }
+    MaybeAlign MemAlign = Arg.Alignment;
     if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
       PointerType *Ty = cast<PointerType>(Arg.Ty);
       Type *ElementTy = Ty->getElementType();
@@ -1080,18 +1081,18 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
 
       // For ByVal, alignment should come from FE. BE will guess if this info
       // is not there, but there are cases it cannot get right.
-      MaybeAlign FrameAlign = Arg.Alignment;
-      if (!FrameAlign)
-        FrameAlign = Align(TLI.getByValTypeAlignment(ElementTy, DL));
+      if (!MemAlign)
+        MemAlign = Align(TLI.getByValTypeAlignment(ElementTy, DL));
       Flags.setByValSize(FrameSize);
-      Flags.setByValAlign(*FrameAlign);
+    } else if (!MemAlign) {
+      MemAlign = DL.getABITypeAlign(Arg.Ty);
     }
+    Flags.setMemAlign(*MemAlign);
     if (Arg.IsNest)
       Flags.setNest();
     if (NeedsRegBlock)
       Flags.setInConsecutiveRegs();
     Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
-
     CLI.OutVals.push_back(Arg.Val);
     CLI.OutFlags.push_back(Flags);
   }

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 9e14e85bcefb1..6b112ee619e0c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9425,6 +9425,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       // for a type depending on the context. Give the target a chance to
       // specify the alignment it wants.
       const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
+      Flags.setOrigAlign(OriginalAlignment);
 
       if (Args[i].Ty->isPointerTy()) {
         Flags.setPointer();
@@ -9478,6 +9479,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         // in the various CC lowering callbacks.
         Flags.setByVal();
       }
+      Align MemAlign;
       if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
@@ -9487,18 +9489,20 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setByValSize(FrameSize);
 
         // info is not there but there are cases it cannot get right.
-        Align FrameAlign;
         if (auto MA = Args[i].Alignment)
-          FrameAlign = *MA;
+          MemAlign = *MA;
         else
-          FrameAlign = Align(getByValTypeAlignment(ElementTy, DL));
-        Flags.setByValAlign(FrameAlign);
+          MemAlign = Align(getByValTypeAlignment(ElementTy, DL));
+      } else if (auto MA = Args[i].Alignment) {
+        MemAlign = *MA;
+      } else {
+        MemAlign = OriginalAlignment;
       }
+      Flags.setMemAlign(MemAlign);
       if (Args[i].IsNest)
         Flags.setNest();
       if (NeedsRegBlock)
         Flags.setInConsecutiveRegs();
-      Flags.setOrigAlign(OriginalAlignment);
 
       MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
                                                  CLI.CallConv, VT);
@@ -9960,11 +9964,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
       ISD::ArgFlagsTy Flags;
 
-      // Certain targets (such as MIPS), may have a 
diff erent ABI alignment
-      // for a type depending on the context. Give the target a chance to
-      // specify the alignment it wants.
-      const Align OriginalAlignment(
-          TLI->getABIAlignmentForCallingConv(ArgTy, DL));
 
       if (Arg.getType()->isPointerTy()) {
         Flags.setPointer();
@@ -10017,6 +10016,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         Flags.setByVal();
       }
 
+      // Certain targets (such as MIPS), may have a 
diff erent ABI alignment
+      // for a type depending on the context. Give the target a chance to
+      // specify the alignment it wants.
+      const Align OriginalAlignment(
+          TLI->getABIAlignmentForCallingConv(ArgTy, DL));
+      Flags.setOrigAlign(OriginalAlignment);
+
+      Align MemAlign;
       Type *ArgMemTy = nullptr;
       if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
           Flags.isByRef()) {
@@ -10028,24 +10035,27 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         // For in-memory arguments, size and alignment should be passed from FE.
         // BE will guess if this info is not there but there are cases it cannot
         // get right.
-        MaybeAlign MemAlign = Arg.getParamAlign();
-        if (!MemAlign)
+        if (auto ParamAlign = Arg.getParamStackAlign())
+          MemAlign = *ParamAlign;
+        else if ((ParamAlign = Arg.getParamAlign()))
+          MemAlign = *ParamAlign;
+        else
           MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
-
-        if (Flags.isByRef()) {
+        if (Flags.isByRef())
           Flags.setByRefSize(MemSize);
-          Flags.setByRefAlign(*MemAlign);
-        } else {
+        else
           Flags.setByValSize(MemSize);
-          Flags.setByValAlign(*MemAlign);
-        }
+      } else if (auto ParamAlign = Arg.getParamStackAlign()) {
+        MemAlign = *ParamAlign;
+      } else {
+        MemAlign = OriginalAlignment;
       }
+      Flags.setMemAlign(MemAlign);
 
       if (Arg.hasAttribute(Attribute::Nest))
         Flags.setNest();
       if (NeedsRegBlock)
         Flags.setInConsecutiveRegs();
-      Flags.setOrigAlign(OriginalAlignment);
       if (ArgCopyElisionCandidates.count(&Arg))
         Flags.setCopyElisionCandidate();
       if (Arg.hasAttribute(Attribute::Returned))

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4f14c7e72409e..fcb8d9b06847c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -115,10 +115,13 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
-  Alignment = Call->getParamAlign(ArgIdx);
+  Alignment = Call->getParamStackAlign(ArgIdx);
   ByValType = nullptr;
-  if (IsByVal)
+  if (IsByVal) {
     ByValType = Call->getParamByValType(ArgIdx);
+    if (!Alignment)
+      Alignment = Call->getParamAlign(ArgIdx);
+  }
   PreallocatedType = nullptr;
   if (IsPreallocated)
     PreallocatedType = Call->getParamPreallocatedType(ArgIdx);

diff  --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index c174e4f931969..a3ad7c7364db3 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -1583,6 +1583,10 @@ MaybeAlign AttributeList::getParamAlignment(unsigned ArgNo) const {
   return getAttributes(ArgNo + FirstArgIndex).getAlignment();
 }
 
+MaybeAlign AttributeList::getParamStackAlignment(unsigned ArgNo) const {
+  return getAttributes(ArgNo + FirstArgIndex).getStackAlignment();
+}
+
 Type *AttributeList::getParamByValType(unsigned Index) const {
   return getAttributes(Index+FirstArgIndex).getByValType();
 }

diff  --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 1001607403d2e..73fd32cedbdb0 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -200,6 +200,10 @@ MaybeAlign Argument::getParamAlign() const {
   return getParent()->getParamAlign(getArgNo());
 }
 
+MaybeAlign Argument::getParamStackAlign() const {
+  return getParent()->getParamStackAlign(getArgNo());
+}
+
 Type *Argument::getParamByValType() const {
   assert(getType()->isPointerTy() && "Only pointers have byval types");
   return getParent()->getParamByValType(getArgNo());

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 536f80880a2f4..cc7ab865bd270 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1648,7 +1648,6 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
   case Attribute::NoImplicitFloat:
   case Attribute::Naked:
   case Attribute::InlineHint:
-  case Attribute::StackAlignment:
   case Attribute::UWTable:
   case Attribute::VScaleRange:
   case Attribute::NonLazyBind:
@@ -1691,7 +1690,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
 static bool isFuncOrArgAttr(Attribute::AttrKind Kind) {
   return Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly ||
          Kind == Attribute::ReadNone || Kind == Attribute::NoFree ||
-         Kind == Attribute::Preallocated;
+         Kind == Attribute::Preallocated || Kind == Attribute::StackAlignment;
 }
 
 void Verifier::verifyAttributeTypes(AttributeSet Attrs, bool IsFunction,
@@ -3313,7 +3312,7 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) {
   static const Attribute::AttrKind ABIAttrs[] = {
       Attribute::StructRet,    Attribute::ByVal,     Attribute::InAlloca,
       Attribute::InReg,        Attribute::SwiftSelf, Attribute::SwiftError,
-      Attribute::Preallocated, Attribute::ByRef};
+      Attribute::Preallocated, Attribute::ByRef,     Attribute::StackAlignment};
   AttrBuilder Copy;
   for (auto AK : ABIAttrs) {
     if (Attrs.hasParamAttribute(I, AK))

diff  --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
index c51dd48cab34b..bfcafc6442d24 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -88,13 +88,8 @@ static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
   }
 
   unsigned Size = LocVT.getSizeInBits() / 8;
-  const Align StackAlign =
-      State.getMachineFunction().getDataLayout().getStackAlignment();
-  const Align OrigAlign = ArgFlags.getNonZeroOrigAlign();
-  const Align Alignment = std::min(OrigAlign, StackAlign);
-
   for (auto &It : PendingMembers) {
-    It.convertToMem(State.AllocateStack(Size, std::max(Alignment, SlotAlign)));
+    It.convertToMem(State.AllocateStack(Size, SlotAlign));
     State.addLoc(It);
     SlotAlign = Align(1);
   }
@@ -197,7 +192,12 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
       State.AllocateReg(Reg);
   }
 
-  const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8);
+  const Align StackAlign =
+      State.getMachineFunction().getDataLayout().getStackAlignment();
+  const Align MemAlign = ArgFlags.getNonZeroMemAlign();
+  Align SlotAlign = std::min(MemAlign, StackAlign);
+  if (!Subtarget.isTargetDarwin())
+    SlotAlign = std::max(SlotAlign, Align(8));
 
   return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
 }

diff  --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index 95bb2639e09bc..0a182202c1e7e 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -550,6 +550,8 @@ declare void @f.param.dereferenceable(i8* dereferenceable(4))
 ; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4))
 declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
 ; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+declare void @f.param.stack_align([2 x double] alignstack(16))
+; CHECK: declare void @f.param.stack_align([2 x double] alignstack(16))
 
 ; Functions -- unnamed_addr and local_unnamed_addr
 declare void @f.unnamed_addr() unnamed_addr

diff  --git a/llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll b/llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll
new file mode 100644
index 0000000000000..a41c974b3498f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=arm64-none-eabi | FileCheck %s
+
+; Over-aligned HFA argument placed on register - one element per register
+define double @test_hfa_align_arg_reg([2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: test_hfa_align_arg_reg:
+; CHECK-NOT: mov
+; CHECK-NOT: ld
+; CHECK: ret
+  %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0
+  ret double %h.coerce.fca.0.extract
+}
+
+; Call with over-aligned HFA argument placed on register - one element per register
+define double @test_hfa_align_call_reg() local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: test_hfa_align_call_reg:
+; CHECK-DAG: fmov  d0, #1.00000000
+; CHECK-DAG: fmov  d1, #2.00000000
+; CHECK:     bl    test_hfa_align_arg_reg
+  %call = call double @test_hfa_align_arg_reg([2 x double] alignstack(16) [double 1.000000e+00, double 2.000000e+00])
+  ret double %call
+}
+
+; Over-aligned HFA argument placed on stack - stack round up to alignment
+define double @test_hfa_align_arg_stack(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %f, [2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: test_hfa_align_arg_stack:
+; CHECK:       ldr  d0, [sp, #16]
+; CHECK-NEXT:  ret
+  %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0
+  ret double %h.coerce.fca.0.extract
+}


        


More information about the llvm-commits mailing list