[llvm] r208192 - [ARM64-BE] Make big endian (scalar) argument passing work correctly.

Wed May 7 04:28:37 PDT 2014

Author: jamesm
Date: Wed May  7 06:28:36 2014
New Revision: 208192

URL: http://llvm.org/viewvc/llvm-project?rev=208192&view=rev
Log:
[ARM64-BE] Make big endian (scalar) argument passing work correctly.

This completes the port of r204814 (cpirker "AArch64_BE function argument
passing for ARM ABI") from AArch64 to ARM64, and fixes a bunch of issues
found during later development along the way. The biggest of these was
that the alignment fixup logic wasn't replicated into all the places it
should have been.

Modified:
    llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/adc.ll
    llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll
    llvm/trunk/test/CodeGen/AArch64/func-calls.ll
    llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll
    llvm/trunk/test/CodeGen/ARM64/aapcs.ll

Modified: llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp?rev=208192&r1=208191&r2=208192&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp Wed May  7 06:28:36 2014
@@ -1678,8 +1678,10 @@ SDValue ARM64TargetLowering::LowerFormal
       int Size = Ins[i].Flags.getByValSize();
       unsigned NumRegs = (Size + 7) / 8;
 
+      // FIXME: This works on big-endian for composite byvals, which are the common
+      // case. It should also work for fundamental types too.
       unsigned FrameIdx =
-          MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
+        MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
       SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
       InVals.push_back(FrameIdxN);
 
@@ -1737,13 +1739,33 @@ SDValue ARM64TargetLowering::LowerFormal
       assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
       unsigned ArgOffset = VA.getLocMemOffset();
       unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
-      int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
+
+      uint32_t BEAlign = 0;
+      if (ArgSize < 8 && !Subtarget->isLittleEndian())
+        BEAlign = 8 - ArgSize;
+
+      int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, FIN,
-                                   MachinePointerInfo::getFixedStack(FI), false,
-                                   false, false, 0));
+      SDValue ArgValue;
+
+      // If the loc type and val type are not the same, create an anyext load.
+      if (VA.getLocVT().getSizeInBits() != VA.getValVT().getSizeInBits()) {
+        // We should only get here if this is a pure integer.
+        assert(!VA.getValVT().isVector() && VA.getValVT().isInteger() &&
+               "Only integer extension supported!");
+        ArgValue = DAG.getExtLoad(ISD::EXTLOAD, DL, VA.getValVT(), Chain, FIN,
+                                  MachinePointerInfo::getFixedStack(FI),
+                                  VA.getLocVT(),
+                                  false, false, false, 0);
+      } else {
+        ArgValue = DAG.getLoad(VA.getValVT(), DL, Chain, FIN,
+                               MachinePointerInfo::getFixedStack(FI), false,
+                               false, false, 0);
+      }
+
+      InVals.push_back(ArgValue);
     }
   }
 
@@ -2089,8 +2111,18 @@ SDValue ARM64TargetLowering::LowerCall(C
       // There's no reason we can't support stack args w/ tailcall, but
       // we currently don't, so assert if we see one.
       assert(!IsTailCall && "stack argument with tail call!?");
+
+      // FIXME: This works on big-endian for composite byvals, which are the common
+      // case. It should also work for fundamental types too.
+      uint32_t BEAlign = 0;
+      if (!Subtarget->isLittleEndian() && !Flags.isByVal()) {
+        unsigned OpSize = (VA.getLocVT().getSizeInBits() + 7) / 8;
+        if (OpSize < 8)
+          BEAlign = 8 - OpSize;
+      }
+
       unsigned LocMemOffset = VA.getLocMemOffset();
-      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset + BEAlign);
       PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
 
       if (Outs[i].Flags.isByVal()) {

Modified: llvm/trunk/test/CodeGen/AArch64/adc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/adc.ll?rev=208192&r1=208191&r2=208192&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/adc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/adc.ll Wed May  7 06:28:36 2014
@@ -1,6 +1,7 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
 
 define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
 ; CHECK-LABEL: test_simple:

Modified: llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll?rev=208192&r1=208191&r2=208192&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/func-argpassing.ll Wed May  7 06:28:36 2014
@@ -1,9 +1,12 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-AARCH64 --check-prefix=CHECK-LE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE-AARCH64 --check-prefix=CHECK-BE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM64-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -152,7 +155,7 @@ define i32 @struct_on_stack(i8 %var0, i1
     %retval = load volatile i32* %stacked
     ret i32 %retval
 ; CHECK-LE: ldr w0, [sp, #16]
-; CHECK-BE: ldr w0, [sp, #20]
+; CHECK-BE-AARCH64: ldr w0, [sp, #20]
 }
 
 define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
@@ -162,8 +165,10 @@ define void @stacked_fpu(float %var0, do
     store float %var8, float* @varfloat
     ; Beware as above: the offset would be different on big-endian
     ; machines if the first ldr were changed to use s-registers.
-; CHECK: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
-; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
+; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
+; CHECK-AARCH64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
+; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
+; CHECK-AARCH64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
 
     ret void
 }
@@ -188,7 +193,7 @@ define void @check_i128_stackalign(i32 %
     ; Nothing local on stack in current codegen, so first stack is 16 away
 ; CHECK-LE: add     x[[REG:[0-9]+]], sp, #16
 ; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8]
-; CHECK-BE: ldr {{x[0-9]+}}, [sp, #24]
+; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24]
 
     ; Important point is that we address sp+24 for second dword
 ; CHECK-AARCH64: ldr     {{x[0-9]+}}, [sp, #16]
@@ -205,3 +210,14 @@ define i32 @test_extern() {
 ; CHECK: bl memcpy
   ret i32 0
 }
+
+
+; A sub-i32 stack argument must be loaded on big endian with ldr{h,b}, not just
+; implicitly extended to a 32-bit load.
+define i16 @stacked_i16(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                        i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                        i16 %stack1) {
+; CHECK-LABEL: stacked_i16
+; CHECK-ARM64-BE: ldrh
+  ret i16 %stack1
+}

Modified: llvm/trunk/test/CodeGen/AArch64/func-calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/func-calls.ll?rev=208192&r1=208191&r2=208192&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/func-calls.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/func-calls.ll Wed May  7 06:28:36 2014
@@ -2,9 +2,11 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-BE --check-prefix=CHECK-NOFP %s
+
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -149,9 +151,9 @@ define void @check_i128_align() {
 
   call void @check_i128_regalign(i32 0, i128 42)
 ; CHECK-NOT: mov x1
-; CHECK-LE: movz x2, #42
+; CHECK-LE: movz x2, #{{0x2a|42}}
 ; CHECK-LE: mov x3, xzr
-; CHECK-BE: movz x3, #42
+; CHECK-BE: movz {{x|w}}3, #{{0x2a|42}}
 ; CHECK-BE: mov x2, xzr
 ; CHECK: bl check_i128_regalign
 

Modified: llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll?rev=208192&r1=208191&r2=208192&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/mul-lohi.ll Wed May  7 06:28:36 2014
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
 ; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
 ; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
 
 define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
 ; CHECK-LABEL: test_128bitmul:

Modified: llvm/trunk/test/CodeGen/ARM64/aapcs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aapcs.ll?rev=208192&r1=208191&r2=208192&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/aapcs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/aapcs.ll Wed May  7 06:28:36 2014
@@ -21,7 +21,7 @@ define void @test_stack_slots([8 x i32],
 
   %ext_bool = zext i1 %bool to i64
   store volatile i64 %ext_bool, i64* @var64, align 8
-; CHECK: ldr w[[EXT:[0-9]+]], [sp]
+; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
 ; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
 ; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
 
@@ -37,7 +37,7 @@ define void @test_stack_slots([8 x i32],
 
   %ext_int = zext i32 %int to i64
   store volatile i64 %ext_int, i64* @var64, align 8
-; CHECK: ldr w[[EXT:[0-9]+]], [sp, #24]
+; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24]
 ; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
 
   store volatile i64 %long, i64* @var64, align 8