[llvm] r214846 - [FastISel][AArch64] Implement the FastLowerArguments hook.

Mon Aug 4 22:43:49 PDT 2014

Author: ributzka
Date: Tue Aug  5 00:43:48 2014
New Revision: 214846

URL: http://llvm.org/viewvc/llvm-project?rev=214846&view=rev
Log:
[FastISel][AArch64] Implement the FastLowerArguments hook.

This implements basic argument lowering for AArch64 in FastISel. It only
handles a small subset of the C calling convention. It supports simple
arguments that can be passed in GPR and FPR registers.

This should cover most of the trivial cases without falling back to
SelectionDAG.

This fixes <rdar://problem/17890986>.

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=214846&r1=214845&r2=214846&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Tue Aug  5 00:43:48 2014
@@ -94,6 +94,7 @@ class AArch64FastISel : public FastISel
   const AArch64Subtarget *Subtarget;
   LLVMContext *Context;
 
+  bool FastLowerArguments() override;
   bool FastLowerCall(CallLoweringInfo &CLI) override;
   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
 
@@ -1313,6 +1314,108 @@ bool AArch64FastISel::SelectIntToFP(cons
   return true;
 }
 
+bool AArch64FastISel::FastLowerArguments() {
+  if (!FuncInfo.CanLowerReturn)
+    return false;
+
+  const Function *F = FuncInfo.Fn;
+  if (F->isVarArg())
+    return false;
+
+  CallingConv::ID CC = F->getCallingConv();
+  if (CC != CallingConv::C)
+    return false;
+
+  // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
+  // FPR each.
+  unsigned GPRCnt = 0;
+  unsigned FPRCnt = 0;
+  unsigned Idx = 0;
+  for (auto const &Arg : F->args()) {
+    // The first argument is at index 1.
+    ++Idx;
+    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+      return false;
+
+    Type *ArgTy = Arg.getType();
+    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+      return false;
+
+    EVT ArgVT = TLI.getValueType(ArgTy);
+    if (!ArgVT.isSimple()) return false;
+    switch (ArgVT.getSimpleVT().SimpleTy) {
+    default: return false;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+      ++GPRCnt;
+      break;
+    case MVT::f16:
+    case MVT::f32:
+    case MVT::f64:
+      ++FPRCnt;
+      break;
+    }
+
+    if (GPRCnt > 8 || FPRCnt > 8)
+      return false;
+  }
+
+  static const MCPhysReg Registers[5][8] = {
+    { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
+      AArch64::W5, AArch64::W6, AArch64::W7 },
+    { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
+      AArch64::X5, AArch64::X6, AArch64::X7 },
+    { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
+      AArch64::H5, AArch64::H6, AArch64::H7 },
+    { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
+      AArch64::S5, AArch64::S6, AArch64::S7 },
+    { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
+      AArch64::D5, AArch64::D6, AArch64::D7 }
+  };
+
+  unsigned GPRIdx = 0;
+  unsigned FPRIdx = 0;
+  for (auto const &Arg : F->args()) {
+    MVT VT = TLI.getSimpleValueType(Arg.getType());
+    unsigned SrcReg;
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type.");
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16: VT = MVT::i32; // fall-through
+    case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
+    case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
+    case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
+    case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
+    case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
+    }
+
+    // Skip unused arguments.
+    if (Arg.use_empty()) {
+      UpdateValueMap(&Arg, 0);
+      continue;
+    }
+
+    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+    // Without this, EmitLiveInCopies may eliminate the livein if its only
+    // use is a bitcast (which isn't turned into an instruction).
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg)
+      .addReg(DstReg, getKillRegState(true));
+    UpdateValueMap(&Arg, ResultReg);
+  }
+  return true;
+}
+
 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
                                       SmallVectorImpl<MVT> &OutVTs,
                                       unsigned &NumBytes) {

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll?rev=214846&r1=214845&r2=214846&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-fast-isel-call.ll Tue Aug  5 00:43:48 2014
@@ -1,6 +1,6 @@
-; RUN: llc -O0 -fast-isel-abort -code-model=small -mtriple=arm64-apple-darwin   < %s | FileCheck %s
-; RUN: llc -O0 -fast-isel-abort -code-model=large -mtriple=arm64-apple-darwin   < %s | FileCheck %s --check-prefix=LARGE
-; RUN: llc -O0 -fast-isel-abort -code-model=small -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -mtriple=arm64-apple-darwin   < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=large -mtriple=arm64-apple-darwin   < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
 
 define void @call0() nounwind {
 entry:
@@ -105,3 +105,149 @@ entry:
   ret void
 }
 
+define zeroext i1 @call_arguments1(i1 %a1, i1 %a2, i1 %a3, i1 %a4, i1 %a5, i1 %a6, i1 %a7, i1 %a8) {
+; CHECK-LABEL: call_arguments1
+; CHECK:       and {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:  and {{w[0-9]+}}, w2, w3
+; CHECK-NEXT:  and {{w[0-9]+}}, w4, w5
+; CHECK-NEXT:  and {{w[0-9]+}}, w6, w7
+  %1 = and i1 %a1, %a2
+  %2 = and i1 %a3, %a4
+  %3 = and i1 %a5, %a6
+  %4 = and i1 %a7, %a8
+  %5 = and i1 %1, %2
+  %6 = and i1 %3, %4
+  %7 = and i1 %5, %6
+  ret i1 %7
+}
+
+define i32 @call_arguments2(i8 zeroext %a1, i8 zeroext %a2, i8 zeroext %a3, i8 zeroext %a4, i8 signext %a5, i8 signext %a6, i8 signext %a7, i8 signext %a8) {
+; CHECK-LABEL: call_arguments2
+; CHECK:       add {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:  add {{w[0-9]+}}, w2, w3
+; CHECK-NEXT:  add {{w[0-9]+}}, w4, w5
+; CHECK-NEXT:  add {{w[0-9]+}}, w6, w7
+  %a1z = zext i8 %a1 to i32
+  %a2z = zext i8 %a2 to i32
+  %a3z = zext i8 %a3 to i32
+  %a4z = zext i8 %a4 to i32
+  %a5s = sext i8 %a5 to i32
+  %a6s = sext i8 %a6 to i32
+  %a7s = sext i8 %a7 to i32
+  %a8s = sext i8 %a8 to i32
+  %1 = add i32 %a1z, %a2z
+  %2 = add i32 %a3z, %a4z
+  %3 = add i32 %a5s, %a6s
+  %4 = add i32 %a7s, %a8s
+  %5 = add i32 %1, %2
+  %6 = add i32 %3, %4
+  %7 = add i32 %5, %6
+  ret i32 %7
+}
+
+define i32 @call_arguments3(i16 zeroext %a1, i16 zeroext %a2, i16 zeroext %a3, i16 zeroext %a4, i16 signext %a5, i16 signext %a6, i16 signext %a7, i16 signext %a8) {
+; CHECK-LABEL: call_arguments3
+; CHECK:       add {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:  add {{w[0-9]+}}, w2, w3
+; CHECK-NEXT:  add {{w[0-9]+}}, w4, w5
+; CHECK-NEXT:  add {{w[0-9]+}}, w6, w7
+  %a1z = zext i16 %a1 to i32
+  %a2z = zext i16 %a2 to i32
+  %a3z = zext i16 %a3 to i32
+  %a4z = zext i16 %a4 to i32
+  %a5s = sext i16 %a5 to i32
+  %a6s = sext i16 %a6 to i32
+  %a7s = sext i16 %a7 to i32
+  %a8s = sext i16 %a8 to i32
+  %1 = add i32 %a1z, %a2z
+  %2 = add i32 %a3z, %a4z
+  %3 = add i32 %a5s, %a6s
+  %4 = add i32 %a7s, %a8s
+  %5 = add i32 %1, %2
+  %6 = add i32 %3, %4
+  %7 = add i32 %5, %6
+  ret i32 %7
+}
+
+define i32 @call_arguments4(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
+; CHECK-LABEL: call_arguments4
+; CHECK:       add {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:  add {{w[0-9]+}}, w2, w3
+; CHECK-NEXT:  add {{w[0-9]+}}, w4, w5
+; CHECK-NEXT:  add {{w[0-9]+}}, w6, w7
+  %1 = add i32 %a1, %a2
+  %2 = add i32 %a3, %a4
+  %3 = add i32 %a5, %a6
+  %4 = add i32 %a7, %a8
+  %5 = add i32 %1, %2
+  %6 = add i32 %3, %4
+  %7 = add i32 %5, %6
+  ret i32 %7
+}
+
+define i64 @call_arguments5(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8) {
+; CHECK-LABEL: call_arguments5
+; CHECK:       add {{x[0-9]+}}, x0, x1
+; CHECK-NEXT:  add {{x[0-9]+}}, x2, x3
+; CHECK-NEXT:  add {{x[0-9]+}}, x4, x5
+; CHECK-NEXT:  add {{x[0-9]+}}, x6, x7
+  %1 = add i64 %a1, %a2
+  %2 = add i64 %a3, %a4
+  %3 = add i64 %a5, %a6
+  %4 = add i64 %a7, %a8
+  %5 = add i64 %1, %2
+  %6 = add i64 %3, %4
+  %7 = add i64 %5, %6
+  ret i64 %7
+}
+
+define float @call_arguments6(float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8) {
+; CHECK-LABEL: call_arguments6
+; CHECK:       fadd {{s[0-9]+}}, s0, s1
+; CHECK-NEXT:  fadd {{s[0-9]+}}, s2, s3
+; CHECK-NEXT:  fadd {{s[0-9]+}}, s4, s5
+; CHECK-NEXT:  fadd {{s[0-9]+}}, s6, s7
+  %1 = fadd float %a1, %a2
+  %2 = fadd float %a3, %a4
+  %3 = fadd float %a5, %a6
+  %4 = fadd float %a7, %a8
+  %5 = fadd float %1, %2
+  %6 = fadd float %3, %4
+  %7 = fadd float %5, %6
+  ret float %7
+}
+
+define double @call_arguments7(double %a1, double %a2, double %a3, double %a4, double %a5, double %a6, double %a7, double %a8) {
+; CHECK-LABEL: call_arguments7
+; CHECK:       fadd {{d[0-9]+}}, d0, d1
+; CHECK-NEXT:  fadd {{d[0-9]+}}, d2, d3
+; CHECK-NEXT:  fadd {{d[0-9]+}}, d4, d5
+; CHECK-NEXT:  fadd {{d[0-9]+}}, d6, d7
+  %1 = fadd double %a1, %a2
+  %2 = fadd double %a3, %a4
+  %3 = fadd double %a5, %a6
+  %4 = fadd double %a7, %a8
+  %5 = fadd double %1, %2
+  %6 = fadd double %3, %4
+  %7 = fadd double %5, %6
+  ret double %7
+}
+
+define i64 @call_arguments8(i32 %a1, i64 %a2, i32 %a3, i64 %a4) {
+; CHECK-LABEL: call_arguments8
+; CHECK:       ubfx  [[REG1:x[0-9]+]], {{x[0-9]+}}, #0, #32
+; CHECK:       ubfx  [[REG2:x[0-9]+]], {{x[0-9]+}}, #0, #32
+; CHECK:       add {{x[0-9]+}}, [[REG1]], x1
+; CHECK-NEXT:  add {{x[0-9]+}}, [[REG2]], x3
+  %aa1 = zext i32 %a1 to i64
+  %aa3 = zext i32 %a3 to i64
+  %1 = add i64 %aa1, %a2
+  %2 = add i64 %aa3, %a4
+  %3 = add i64 %1, %2
+  ret i64 %3
+}
+
+define void @call_arguments9(i8 %a1, i16 %a2, i32 %a3, i64 %a4, float %a5, double %a6, i64 %a7, double %a8) {
+; CHECK-LABEL: call_arguments9
+  ret void
+}