[clang] [X86_64] fix SSE type error in vaarg. (PR #86377)

Longsheng Mou via cfe-commits cfe-commits at lists.llvm.org
Sun Mar 24 18:48:54 PDT 2024


https://github.com/CoTinker updated https://github.com/llvm/llvm-project/pull/86377

>From b494f2e7f59668678f95d4a739d810f6fd6944aa Mon Sep 17 00:00:00 2001
From: Longsheng Mou <moulongsheng at huawei.com>
Date: Sat, 23 Mar 2024 11:52:04 +0800
Subject: [PATCH] [X86_64] fix SSE type error in vaarg.

tweak the position of the ++neededSSE when Lo is NoClass
and Hi is SSE.
---
 clang/lib/CodeGen/Targets/X86.cpp      |  3 +-
 clang/test/CodeGen/X86/x86_64-vaarg.c  | 69 ++++++++++++++++++++++++++
 clang/test/CodeGenCXX/x86_64-vaarg.cpp | 50 +++++++++++++++++--
 3 files changed, 117 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/CodeGen/X86/x86_64-vaarg.c

diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index 1ec0f159ebcb8a..6931768147a9b2 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -2788,12 +2788,11 @@ X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs,
     // memory), except in situations involving unions.
   case X87Up:
   case SSE:
+    ++neededSSE;
     HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);
 
     if (Lo == NoClass)  // Pass HighPart at offset 8 in memory.
       return ABIArgInfo::getDirect(HighPart, 8);
-
-    ++neededSSE;
     break;
 
     // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
diff --git a/clang/test/CodeGen/X86/x86_64-vaarg.c b/clang/test/CodeGen/X86/x86_64-vaarg.c
new file mode 100644
index 00000000000000..a18ba836423881
--- /dev/null
+++ b/clang/test/CodeGen/X86/x86_64-vaarg.c
@@ -0,0 +1,69 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+
+typedef struct { struct {} a; } empty;
+
+// CHECK-LABEL: define dso_local void @empty_record_test(
+// CHECK-SAME: i32 noundef [[Z:%.*]], ...) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1
+// CHECK-NEXT:    store i32 [[Z]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 0, i1 false)
+// CHECK-NEXT:    ret void
+//
+empty empty_record_test(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, empty);
+}
+
+typedef struct {
+  struct{} a;
+  double b;
+} s1;
+
+// CHECK-LABEL: define dso_local double @f(
+// CHECK-SAME: i32 noundef [[Z:%.*]], ...) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    store i32 [[Z]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1
+// CHECK-NEXT:    [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:    [[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160
+// CHECK-NEXT:    br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]]
+// CHECK:       vaarg.in_reg:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[FP_OFFSET]], 16
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:    br label [[VAARG_END:%.*]]
+// CHECK:       vaarg.in_mem:
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 8
+// CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    br label [[VAARG_END]]
+// CHECK:       vaarg.end:
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP1]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 8, i1 false)
+// CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    ret double [[TMP3]]
+//
+s1 f(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, s1);
+}
diff --git a/clang/test/CodeGenCXX/x86_64-vaarg.cpp b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
index f0177906a09a81..d221c1881d3672 100644
--- a/clang/test/CodeGenCXX/x86_64-vaarg.cpp
+++ b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
@@ -1,10 +1,9 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -x c -o - %s | FileCheck %s
 
 typedef struct { struct {} a; } empty;
 
-// CHECK-LABEL: @{{.*}}empty_record_test
+// CHECK-LABEL: @_Z17empty_record_testiz(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
 // CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
@@ -14,10 +13,55 @@ typedef struct { struct {} a; } empty;
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
 // CHECK-NEXT:    call void @llvm.va_start(ptr [[ARRAYDECAY]])
 // CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
-// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 {{.*}}, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 1, i1 false)
 // CHECK-NEXT:    ret void
+//
 empty empty_record_test(int z, ...) {
   __builtin_va_list list;
   __builtin_va_start(list, z);
   return __builtin_va_arg(list, empty);
 }
+
+typedef struct {
+  struct{} a;
+  double b;
+} s1;
+
+// CHECK-LABEL: @_Z1fiz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1
+// CHECK-NEXT:    [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:    [[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160
+// CHECK-NEXT:    br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]]
+// CHECK:       vaarg.in_reg:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[FP_OFFSET]], 16
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:    br label [[VAARG_END:%.*]]
+// CHECK:       vaarg.in_mem:
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    br label [[VAARG_END]]
+// CHECK:       vaarg.end:
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP1]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8
+// CHECK-NEXT:    ret double [[TMP4]]
+//
+s1 f(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, s1);
+}



More information about the cfe-commits mailing list