[clang] 1c4108a - [i386] Modify the alignment of m128/m256/__m512 vector type according i386 abi.

Wed Apr 14 01:47:44 PDT 2021

Author: Liu, Chen3
Date: 2021-04-14T16:44:54+08:00
New Revision: 1c4108ab661d43e21b1d1c804d8a403e5b0cf7d6

URL: https://github.com/llvm/llvm-project/commit/1c4108ab661d43e21b1d1c804d8a403e5b0cf7d6
DIFF: https://github.com/llvm/llvm-project/commit/1c4108ab661d43e21b1d1c804d8a403e5b0cf7d6.diff

LOG: [i386] Modify the alignment of __m128/__m256/__m512 vector type according i386 abi.

According to i386 System V ABI:

1. when __m256 are required to be passed on the stack, the stack pointer must be aligned on a 0 mod 32 byte boundary at the time of the call.
2. when __m512 are required to be passed on the stack, the stack pointer must be aligned on a 0 mod 64 byte boundary at the time of the call.

The current method of clang passing __m512 parameter are as follow:

1. when target supports avx512, passing it with 64 byte alignment;
2. when target supports avx, passing it with 32 byte alignment;
3. Otherwise, passing it with 16 byte alignment.

Passing __m256 parameter are as follow:

1. when target supports avx or avx512, passing it with 32 byte alignment;
2. Otherwise, passing it with 16 byte alignment.

This pach will passing __m128/__m256/__m512 following i386 System V ABI and
apply it to Linux only since other System V OS (e.g Darwin, PS4 and FreeBSD) don't
want to spend any effort dealing with the ramifications of ABI breaks at present.

Differential Revision: https://reviews.llvm.org/D78564

Added: 
    clang/test/CodeGen/x86_32-align-linux.c

Modified: 
    clang/lib/CodeGen/TargetInfo.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 7f4deb21d6ed..55e38741e287 100644

--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -1105,6 +1105,7 @@ class X86_32ABIInfo : public SwiftABIInfo {
   bool IsWin32StructABI;
   bool IsSoftFloatABI;
   bool IsMCUABI;
+  bool IsLinuxABI;
   unsigned DefaultNumRegisterParameters;
 
   static bool isRegisterSize(unsigned Size) {
@@ -1167,9 +1168,9 @@ class X86_32ABIInfo : public SwiftABIInfo {
                 unsigned NumRegisterParameters, bool SoftFloatABI)
     : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
       IsRetSmallStructInRegABI(RetSmallStructInRegABI),
-      IsWin32StructABI(Win32StructABI),
-      IsSoftFloatABI(SoftFloatABI),
+      IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
       IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
+      IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()),
       DefaultNumRegisterParameters(NumRegisterParameters) {}
 
   bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
@@ -1594,6 +1595,14 @@ unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
   if (Align <= MinABIStackAlignInBytes)
     return 0; // Use default alignment.
 
+  if (IsLinuxABI) {
+    // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
+    // want to spend any effort dealing with the ramifications of ABI breaks.
+    //
+    // If the vector type is __m128/__m256/__m512, return the default alignment.
+    if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64))
+      return Align;
+  }
   // On non-Darwin, the stack type alignment is always 4.
   if (!IsDarwinVectorABI) {
     // Set explicit alignment, since we may need to realign the top.

diff  --git a/clang/test/CodeGen/x86_32-align-linux.c b/clang/test/CodeGen/x86_32-align-linux.c
new file mode 100644
index 000000000000..6e6ddd757b6f
--- /dev/null
+++ b/clang/test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx512f -emit-llvm -o - %s | FileCheck %s
+
+#include <immintrin.h>
+
+// CHECK-LABEL: define dso_local void @testm128
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm128(int argCount, ...) {
+  __m128 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m128);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define dso_local void @testm256
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 31
+// CHECK-NEXT:  %2 = and i32 %1, -32
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm256(int argCount, ...) {
+  __m256 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m256);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define dso_local void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 63
+// CHECK-NEXT:  %2 = and i32 %1, -64
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+  __m512 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m512);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define dso_local void @testPastArguments
+// CHECK: call void (i32, ...) @testm128(i32 1, <4 x float> %0)
+// CHECK: call void (i32, ...) @testm256(i32 1, <8 x float> %1)
+// CHECK: call void (i32, ...) @testm512(i32 1, <16 x float> %2)
+void testPastArguments(void) {
+  __m128 a;
+  __m256 b;
+  __m512 c;
+  testm128(1, a);
+  testm256(1, b);
+  testm512(1, c);
+}


        


[clang] 1c4108a - [i386] Modify the alignment of __m128/__m256/__m512 vector type according i386 abi.

[clang] 1c4108a - [i386] Modify the alignment of m128/m256/__m512 vector type according i386 abi.