[clang] [X86] Return illegal vectors in memory (PR #121944)

Pranav Kant via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 7 06:48:26 PST 2025


https://github.com/pranavk updated https://github.com/llvm/llvm-project/pull/121944

>From 2347ae937659988e54bc6b9f47b6edb0fdaa8c13 Mon Sep 17 00:00:00 2001
From: Pranav Kant <prka at google.com>
Date: Tue, 7 Jan 2025 14:48:00 +0000
Subject: [PATCH] [X86] Return illegal vectors in memory

When vector size doesn't fit in native machine vector size, we should
return vector via a hidden reference.
---
 clang/include/clang/Basic/LangOptions.h     |  2 ++
 clang/lib/CodeGen/Targets/X86.cpp           | 38 +++++++++++++++++++--
 clang/test/CodeGen/X86/x86-illegal-vector.c | 22 ++++++++++++
 clang/test/CodeGen/X86/x86-vec-i128.c       | 38 +++++++++++----------
 4 files changed, 79 insertions(+), 21 deletions(-)
 create mode 100644 clang/test/CodeGen/X86/x86-illegal-vector.c

diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 949c8f5d448bcf..7dd8251c9da5c2 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -245,6 +245,8 @@ class LangOptionsBase {
     ///   construction vtable because it hasn't added 'type' as a substitution.
     ///   - Skip mangling enclosing class templates of member-like friend
     ///   function templates.
+    ///   - Incorrectly return illegal vectors (size greater than native
+    ///     vector size) to be returned in illegal registers on x86_64.
     Ver19,
 
     /// Conform to the underlying platform's C and C++ ABIs as closely
diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index 7f73bf2a65266e..a40dbf9160ba6e 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -1298,8 +1298,12 @@ class X86_64ABIInfo : public ABIInfo {
                                            unsigned &NeededSSE,
                                            unsigned &MaxVectorWidth) const;
 
+  // Checks whether vector types for function arguments are illegal
   bool IsIllegalVectorType(QualType Ty) const;
 
+  // Checks whether vector types for returns are illegal
+  bool IsIllegalReturnVectorType(QualType Ty) const;
+
   /// The 0.98 ABI revision clarified a lot of ambiguities,
   /// unfortunately in ways that were not always consistent with
   /// certain previous compilers.  In particular, platforms which
@@ -1334,6 +1338,16 @@ class X86_64ABIInfo : public ABIInfo {
     return T.isOSLinux() || T.isOSNetBSD();
   }
 
+  bool returnIllegalVectorsInMem() const {
+    // Clang <= 19.0 did not do this.
+    if (getContext().getLangOpts().getClangABICompat() <=
+        LangOptions::ClangABI::Ver19)
+      return false;
+
+    const llvm::Triple &T = getTarget().getTriple();
+    return T.isOSLinux() || T.isOSNetBSD();
+  }
+
   X86AVXABILevel AVXLevel;
   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
   // 64-bit hardware.
@@ -2156,9 +2170,12 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
 }
 
 ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
+  const bool returnIllegalVectorsIndirectly =
+      (returnIllegalVectorsInMem() && IsIllegalReturnVectorType(Ty));
+
   // If this is a scalar LLVM value then assume LLVM will pass it in the right
   // place naturally.
-  if (!isAggregateTypeForABI(Ty)) {
+  if (!isAggregateTypeForABI(Ty) && !returnIllegalVectorsIndirectly) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
@@ -2173,12 +2190,23 @@ ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
   return getNaturalAlignIndirect(Ty);
 }
 
-bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
+static bool IsIllegalVector(QualType Ty, uint64_t Size,
+                            X86AVXABILevel AVXLevel) {
   if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
-    uint64_t Size = getContext().getTypeSize(VecTy);
     unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
     if (Size <= 64 || Size > LargestVector)
       return true;
+  }
+
+  return false;
+}
+
+bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
+  if (IsIllegalVector(Ty, getContext().getTypeSize(Ty), AVXLevel))
+    return true;
+
+  // Maintain backward compatibility
+  if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
     QualType EltTy = VecTy->getElementType();
     if (passInt128VectorsInMem() &&
         (EltTy->isSpecificBuiltinType(BuiltinType::Int128) ||
@@ -2189,6 +2217,10 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
   return false;
 }
 
+bool X86_64ABIInfo::IsIllegalReturnVectorType(QualType Ty) const {
+  return IsIllegalVector(Ty, getContext().getTypeSize(Ty), AVXLevel);
+}
+
 ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
                                             unsigned freeIntRegs) const {
   // If this is a scalar LLVM value then assume LLVM will pass it in the right
diff --git a/clang/test/CodeGen/X86/x86-illegal-vector.c b/clang/test/CodeGen/X86/x86-illegal-vector.c
new file mode 100644
index 00000000000000..73fef26539fca8
--- /dev/null
+++ b/clang/test/CodeGen/X86/x86-illegal-vector.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=REGRET128,MEMRET256,MEMRET512
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -emit-llvm -o - -fclang-abi-compat=19 | FileCheck %s --check-prefixes=REGRET128,REGRET256,REGRET512
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=REGRET128,REGRET256,MEMRET512
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -emit-llvm -o - -fclang-abi-compat=19 | FileCheck %s --check-prefixes=REGRET128,REGRET256,REGRET512
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes=REGRET128,REGRET256,REGRET512
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -emit-llvm -o - -fclang-abi-compat=19 | FileCheck %s --check-prefixes=REGRET128,REGRET256,REGRET512
+
+#define __MM_MALLOC_H
+#include <x86intrin.h>
+
+// REGRET128: define{{.*}} <4 x float> @get2()
+__m128 get2() { __m128 r = (__m128){5,6}; return r; }
+
+// MEMRET256: define{{.*}} void @get4(ptr{{.*}} sret(<8 x float>) align 32 %{{.*}})
+// REGRET256: define{{.*}} <8 x float> @get4()
+__m256 get4() { __m256 r = (__m256){7,8,9,10}; return r; }
+
+// MEMRET512: define{{.*}} void @get8(ptr{{.*}} sret(<16 x float>) align 64 %{{.*}})
+// REGRET512: define{{.*}} <16 x float> @get8()
+__m512 get8() { __m512 r = (__m512){7,8,9,10,1,2,3,4}; return r; }
\ No newline at end of file
diff --git a/clang/test/CodeGen/X86/x86-vec-i128.c b/clang/test/CodeGen/X86/x86-vec-i128.c
index ee58cb92da6b10..bdcfaeeea27784 100644
--- a/clang/test/CodeGen/X86/x86-vec-i128.c
+++ b/clang/test/CodeGen/X86/x86-vec-i128.c
@@ -1,16 +1,16 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,MEM256ALIGN32,MEM512ALIGN64
-// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,MEM256ALIGN32,MEM512ALIGN64
-// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN16,MEM512ALIGN16
-// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
-// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
-// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
-
-// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64
-// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64
-// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN32
-// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
-// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
-// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,MEMRETMEMARG256ALIGN32,MEMRETMEMARG512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,MEMRETMEMARG256ALIGN32,MEMRETMEMARG512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEMARG256ALIGN16,MEMARG512ALIGN16
+// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEMARG256ALIGN32,MEMARG512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +sse2 -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEMARG256ALIGN32,MEMARG512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,MEMARG256ALIGN32,MEMARG512ALIGN64
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEMRETMEMARG512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEMRETMEMARG512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEMARG512ALIGN32
+// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEMARG512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEMARG512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEMARG512ALIGN64
 
 // RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512
 // RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512
@@ -32,8 +32,9 @@ typedef unsigned long long v32u64 __attribute__((vector_size(32)));
 typedef unsigned __int128 v32u128 __attribute__((vector_size(32)));
 
 v32u64 test_v32u128(v32u64 a, v32u128 b) {
-// MEM256ALIGN16: define{{.*}} <4 x i64> @test_v32u128(ptr noundef byval(<4 x i64>) align 16 %{{.*}}, ptr noundef byval(<2 x i128>) align 16 %{{.*}})
-// MEM256ALIGN32: define{{.*}} <4 x i64> @test_v32u128(ptr noundef byval(<4 x i64>) align 32 %{{.*}}, ptr noundef byval(<2 x i128>) align 32 %{{.*}})
+// MEMARG256ALIGN16: define{{.*}} <4 x i64> @test_v32u128(ptr noundef byval(<4 x i64>) align 16 %{{.*}}, ptr noundef byval(<2 x i128>) align 16 %{{.*}})
+// MEMARG256ALIGN32: define{{.*}} <4 x i64> @test_v32u128(ptr noundef byval(<4 x i64>) align 32 %{{.*}}, ptr noundef byval(<2 x i128>) align 32 %{{.*}})
+// MEMRETMEMARG256ALIGN32: define{{.*}} void @test_v32u128(ptr{{.*}} sret(<4 x i64>) align 32 %{{.*}}, ptr noundef byval(<4 x i64>) align 32 %{{.*}}, ptr noundef byval(<2 x i128>) align 32 %{{.*}})
 // CLANG10ABI256: define{{.*}} <4 x i64> @test_v32u128(<4 x i64> noundef %{{.*}}, ptr noundef byval(<2 x i128>) align 32 %{{.*}})
 // CLANG9ABI256: define{{.*}} <4 x i64> @test_v32u128(<4 x i64> noundef %{{.*}}, <2 x i128> noundef %{{.*}})
   return a + (v32u64)b;
@@ -43,9 +44,10 @@ typedef unsigned long long v64u64 __attribute__((vector_size(64)));
 typedef unsigned __int128 v64u128 __attribute__((vector_size(64)));
 
 v64u64 test_v64u128(v64u64 a, v64u128 b) {
-// MEM512ALIGN16: define{{.*}} <8 x i64> @test_v64u128(ptr noundef byval(<8 x i64>) align 16 %{{.*}}, ptr noundef byval(<4 x i128>) align 16 %{{.*}})
-// MEM512ALIGN32: define{{.*}} <8 x i64> @test_v64u128(ptr noundef byval(<8 x i64>) align 32 %{{.*}}, ptr noundef byval(<4 x i128>) align 32 %{{.*}})
-// MEM512ALIGN64: define{{.*}} <8 x i64> @test_v64u128(ptr noundef byval(<8 x i64>) align 64 %{{.*}}, ptr noundef byval(<4 x i128>) align 64 %{{.*}})
+// MEMARG512ALIGN16: define{{.*}} <8 x i64> @test_v64u128(ptr noundef byval(<8 x i64>) align 16 %{{.*}}, ptr noundef byval(<4 x i128>) align 16 %{{.*}})
+// MEMARG512ALIGN32: define{{.*}} <8 x i64> @test_v64u128(ptr noundef byval(<8 x i64>) align 32 %{{.*}}, ptr noundef byval(<4 x i128>) align 32 %{{.*}})
+// MEMARG512ALIGN64: define{{.*}} <8 x i64> @test_v64u128(ptr noundef byval(<8 x i64>) align 64 %{{.*}}, ptr noundef byval(<4 x i128>) align 64 %{{.*}})
+// MEMRETMEMARG512ALIGN64: define{{.*}} void @test_v64u128(ptr{{.*}} sret(<8 x i64>) align 64 %{{.*}}, ptr noundef byval(<8 x i64>) align 64 %{{.*}}, ptr noundef byval(<4 x i128>) align 64 %{{.*}})
 // CLANG10ABI512: define{{.*}} <8 x i64> @test_v64u128(<8 x i64> noundef %{{.*}}, ptr noundef byval(<4 x i128>) align 64 %{{.*}})
 // CLANG9ABI512: define{{.*}} <8 x i64> @test_v64u128(<8 x i64> noundef %{{.*}}, <4 x i128> noundef %{{.*}})
   return a + (v64u64)b;



More information about the cfe-commits mailing list