[clang] b451ecd - [Clang][AArch64] Disable rounding of return values for AArch64

Tue May 4 10:29:35 PDT 2021

Author: Andrew Savonichev
Date: 2021-05-04T20:29:01+03:00
New Revision: b451ecd86e13ec6ef47caf37f62977645c4f748e

URL: https://github.com/llvm/llvm-project/commit/b451ecd86e13ec6ef47caf37f62977645c4f748e
DIFF: https://github.com/llvm/llvm-project/commit/b451ecd86e13ec6ef47caf37f62977645c4f748e.diff

LOG: [Clang][AArch64] Disable rounding of return values for AArch64

If a return value is explicitly rounded to 64 bits, an additional zext
instruction is emitted, and in some cases it prevents tail call
optimization.

As discussed in D100225, this rounding is not necessary and can be
disabled.

Differential Revision: https://reviews.llvm.org/D100591

Added: 
    

Modified: 
    clang/lib/CodeGen/TargetInfo.cpp
    clang/test/CodeGen/aarch64-varargs.c
    clang/test/CodeGen/arm64-arguments.c
    clang/test/CodeGen/arm64-microsoft-arguments.cpp
    clang/test/CodeGen/attr-noundef.cpp
    clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
    clang/test/CodeGenCXX/trivial_abi.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 9577b61ca6d0..633b963965ed 100644

--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -5781,6 +5781,18 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
     if (getTarget().isRenderScriptTarget()) {
       return coerceToIntArray(RetTy, getContext(), getVMContext());
     }
+
+    if (Size <= 64 && getDataLayout().isLittleEndian()) {
+      // Composite types are returned in lower bits of a 64-bit register for LE,
+      // and in higher bits for BE. However, integer types are always returned
+      // in lower bits for both LE and BE, and they are not rounded up to
+      // 64-bits. We can skip rounding up of composite types for LE, but not for
+      // BE, otherwise composite types will be indistinguishable from integer
+      // types.
+      return ABIArgInfo::getDirect(
+          llvm::IntegerType::get(getVMContext(), Size));
+    }
+
     unsigned Alignment = getContext().getTypeAlign(RetTy);
     Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
 

diff  --git a/clang/test/CodeGen/aarch64-varargs.c b/clang/test/CodeGen/aarch64-varargs.c
index b71ec4af7aca..908fb4ae5d10 100644
--- a/clang/test/CodeGen/aarch64-varargs.c
+++ b/clang/test/CodeGen/aarch64-varargs.c
@@ -473,7 +473,8 @@ typedef struct __attribute__((packed,aligned(2))) {
   int val;
 } underaligned_int_struct;
 underaligned_int_struct underaligned_int_struct_test() {
-// CHECK-LABEL: define{{.*}} i64 @underaligned_int_struct_test()
+// CHECK-LE-LABEL: define{{.*}} i32 @underaligned_int_struct_test()
+// CHECK-BE-LABEL: define{{.*}} i64 @underaligned_int_struct_test()
   return va_arg(the_list, underaligned_int_struct);
 // CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
@@ -675,7 +676,8 @@ typedef struct {
   int val __attribute__((packed,aligned(2)));
 } underaligned_int_struct_member;
 underaligned_int_struct_member underaligned_int_struct_member_test() {
-// CHECK-LABEL: define{{.*}} i64 @underaligned_int_struct_member_test()
+// CHECK-LE-LABEL: define{{.*}} i32 @underaligned_int_struct_member_test()
+// CHECK-BE-LABEL: define{{.*}} i64 @underaligned_int_struct_member_test()
   return va_arg(the_list, underaligned_int_struct_member);
 // CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0

diff  --git a/clang/test/CodeGen/arm64-arguments.c b/clang/test/CodeGen/arm64-arguments.c
index a40e5365cc51..b362346aa8a8 100644
--- a/clang/test/CodeGen/arm64-arguments.c
+++ b/clang/test/CodeGen/arm64-arguments.c
@@ -1,33 +1,41 @@
-// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+// RUN: %clang_cc1 -triple aarch64_be-none-linux-gnu -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 
 // CHECK: define{{.*}} signext i8 @f0()
 char f0(void) {
   return 0;
 }
 
-// Struct as return type. Aggregates <= 16 bytes are passed directly and round
-// up to multiple of 8 bytes.
-// CHECK: define{{.*}} i64 @f1()
+// Struct as return type. Aggregates <= 16 bytes are passed directly. For BE,
+// return values are round up to 64 bits.
+//
+// CHECK-LE: define{{.*}} i8 @f1()
+// CHECK-BE: define{{.*}} i64 @f1()
 struct s1 { char f0; };
 struct s1 f1(void) {}
 
-// CHECK: define{{.*}} i64 @f2()
+// CHECK-LE: define{{.*}} i16 @f2()
+// CHECK-BE: define{{.*}} i64 @f2()
 struct s2 { short f0; };
 struct s2 f2(void) {}
 
-// CHECK: define{{.*}} i64 @f3()
+// CHECK-LE: define{{.*}} i32 @f3()
+// CHECK-BE: define{{.*}} i64 @f3()
 struct s3 { int f0; };
 struct s3 f3(void) {}
 
-// CHECK: define{{.*}} i64 @f4()
+// CHECK-LE: define{{.*}} i32 @f4()
+// CHECK-BE: define{{.*}} i64 @f4()
 struct s4 { struct s4_0 { int f0; } f0; };
 struct s4 f4(void) {}
 
-// CHECK: define{{.*}} i64 @f5()
+// CHECK-LE: define{{.*}} i32 @f5()
+// CHECK-BE: define{{.*}} i64 @f5()
 struct s5 { struct { } f0; int f1; };
 struct s5 f5(void) {}
 
-// CHECK: define{{.*}} i64 @f6()
+// CHECK-LE: define{{.*}} i32 @f6()
+// CHECK-BE: define{{.*}} i64 @f6()
 struct s6 { int f0[1]; };
 struct s6 f6(void) {}
 
@@ -39,19 +47,33 @@ struct s7 f7(void) {}
 struct s8 { struct { int : 0; } f0[1]; };
 struct s8 f8(void) {}
 
-// CHECK: define{{.*}} i64 @f9()
+// CHECK-LE: define{{.*}} i32 @f9()
+// CHECK-BE: define{{.*}} i64 @f9()
 struct s9 { int f0; int : 0; };
 struct s9 f9(void) {}
 
-// CHECK: define{{.*}} i64 @f10()
+// CHECK-LE: define{{.*}} i32 @f10()
+// CHECK-BE: define{{.*}} i64 @f10()
 struct s10 { int f0; int : 0; int : 0; };
 struct s10 f10(void) {}
 
-// CHECK: define{{.*}} i64 @f11()
+// CHECK-LE: define{{.*}} i32 @f11()
+// CHECK-BE: define{{.*}} i64 @f11()
 struct s11 { int : 0; int f0; };
 struct s11 f11(void) {}
 
-// CHECK: define{{.*}} i64 @f12()
+// CHECK-LE: define{{.*}} i24 @f11_packed()
+// CHECK-BE: define{{.*}} i64 @f11_packed()
+struct s11_packed { char c; short s } __attribute__((packed));
+struct s11_packed f11_packed(void) { }
+
+// CHECK-LE: define{{.*}} i32 @f11_not_packed()
+// CHECK-BE: define{{.*}} i64 @f11_not_packed()
+struct s11_not_packed { char c; short s; };
+struct s11_not_packed f11_not_packed(void) { }
+
+// CHECK-LE: define{{.*}} i32 @f12()
+// CHECK-BE: define{{.*}} i64 @f12()
 union u12 { char f0; short f1; int f2; };
 union u12 f12(void) {}
 
@@ -69,28 +91,35 @@ void f15(struct s7 a0) {}
 // CHECK: define{{.*}} void @f16()
 void f16(struct s8 a0) {}
 
-// CHECK: define{{.*}} i64 @f17()
+// CHECK-LE: define{{.*}} i32 @f17()
+// CHECK-BE: define{{.*}} i64 @f17()
 struct s17 { short f0 : 13; char f1 : 4; };
 struct s17 f17(void) {}
 
-// CHECK: define{{.*}} i64 @f18()
+// CHECK-LE: define{{.*}} i32 @f18()
+// CHECK-BE: define{{.*}} i64 @f18()
 struct s18 { short f0; char f1 : 4; };
 struct s18 f18(void) {}
 
-// CHECK: define{{.*}} i64 @f19()
+// CHECK-LE: define{{.*}} i32 @f19()
+// CHECK-BE: define{{.*}} i64 @f19()
 struct s19 { int f0; struct s8 f1; };
 struct s19 f19(void) {}
 
-// CHECK: define{{.*}} i64 @f20()
+// CHECK-LE: define{{.*}} i32 @f20()
+// CHECK-BE: define{{.*}} i64 @f20()
 struct s20 { struct s8 f1; int f0; };
 struct s20 f20(void) {}
 
-// CHECK: define{{.*}} i64 @f21()
+// CHECK-LE: define{{.*}} i32 @f21()
+// CHECK-BE: define{{.*}} i64 @f21()
 struct s21 { struct {} f1; int f0 : 4; };
 struct s21 f21(void) {}
 
-// CHECK: define{{.*}} i64 @f22()
-// CHECK: define{{.*}} i64 @f23()
+// CHECK-LE: define{{.*}} i16 @f22()
+// CHECK-LE: define{{.*}} i32 @f23()
+// CHECK-BE: define{{.*}} i64 @f22()
+// CHECK-BE: define{{.*}} i64 @f23()
 // CHECK: define{{.*}} i64 @f24()
 // CHECK: define{{.*}} [2 x i64] @f25()
 // CHECK: define{{.*}} { float, float } @f26()
@@ -102,11 +131,13 @@ _Complex long long  f25(void) {}
 _Complex float      f26(void) {}
 _Complex double     f27(void) {}
 
-// CHECK: define{{.*}} i64 @f28()
+// CHECK-LE: define{{.*}} i16 @f28()
+// CHECK-BE: define{{.*}} i64 @f28()
 struct s28 { _Complex char f0; };
 struct s28 f28() {}
 
-// CHECK: define{{.*}} i64 @f29()
+// CHECK-LE: define{{.*}} i32 @f29()
+// CHECK-BE: define{{.*}} i64 @f29()
 struct s29 { _Complex short f0; };
 struct s29 f29() {}
 
@@ -118,7 +149,9 @@ struct s31 { char x; };
 void f31(struct s31 s) { }
 // CHECK: define{{.*}} void @f31(i64 %s.coerce)
 // CHECK: %s = alloca %struct.s31, align 1
-// CHECK: trunc i64 %s.coerce to i8
+// CHECK-BE: %coerce.highbits = lshr i64 %s.coerce, 56
+// CHECK-BE: trunc i64 %coerce.highbits to i8
+// CHECK-LE: trunc i64 %s.coerce to i8
 // CHECK: store i8 %{{.*}},
 
 struct s32 { double x; };
@@ -624,15 +657,15 @@ struct HFA {
 };
 
 float test_hfa(int n, ...) {
-// CHECK-LABEL: define{{.*}} float @test_hfa(i32 %n, ...)
-// CHECK: [[THELIST:%.*]] = alloca i8*
-// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
+// CHECK-LE-LABEL: define{{.*}} float @test_hfa(i32 %n, ...)
+// CHECK-LE: [[THELIST:%.*]] = alloca i8*
+// CHECK-LE: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // HFA is not indirect, so occupies its full 16 bytes on the stack.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 16
-// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
+// CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 16
+// CHECK-LE: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
-// CHECK: bitcast i8* [[CURLIST]] to %struct.HFA*
+// CHECK-LE: bitcast i8* [[CURLIST]] to %struct.HFA*
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
   struct HFA h = __builtin_va_arg(thelist, struct HFA);
@@ -650,17 +683,17 @@ struct TooBigHFA {
 };
 
 float test_toobig_hfa(int n, ...) {
-// CHECK-LABEL: define{{.*}} float @test_toobig_hfa(i32 %n, ...)
-// CHECK: [[THELIST:%.*]] = alloca i8*
-// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
+// CHECK-LE-LABEL: define{{.*}} float @test_toobig_hfa(i32 %n, ...)
+// CHECK-LE: [[THELIST:%.*]] = alloca i8*
+// CHECK-LE: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
   // of stack consumed.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
-// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
+// CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
+// CHECK-LE: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
-// CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHFA**
-// CHECK: [[HFAPTR:%.*]] = load %struct.TooBigHFA*, %struct.TooBigHFA** [[HFAPTRPTR]]
+// CHECK-LE: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHFA**
+// CHECK-LE: [[HFAPTR:%.*]] = load %struct.TooBigHFA*, %struct.TooBigHFA** [[HFAPTRPTR]]
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
   struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA);
@@ -672,21 +705,21 @@ struct HVA {
 };
 
 int32x4_t test_hva(int n, ...) {
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_hva(i32 %n, ...)
-// CHECK: [[THELIST:%.*]] = alloca i8*
-// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
+// CHECK-LE-LABEL: define{{.*}} <4 x i32> @test_hva(i32 %n, ...)
+// CHECK-LE: [[THELIST:%.*]] = alloca i8*
+// CHECK-LE: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
   // must be properly aligned.
-// CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
-// CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
-// CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
-// CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
+// CHECK-LE: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
+// CHECK-LE: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
+// CHECK-LE: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
+// CHECK-LE: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
 
-// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 32
-// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
+// CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 32
+// CHECK-LE: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
-// CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA*
+// CHECK-LE: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA*
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
   struct HVA h = __builtin_va_arg(thelist, struct HVA);
@@ -698,17 +731,17 @@ struct TooBigHVA {
 };
 
 int32x4_t test_toobig_hva(int n, ...) {
-// CHECK-LABEL: define{{.*}} <4 x i32> @test_toobig_hva(i32 %n, ...)
-// CHECK: [[THELIST:%.*]] = alloca i8*
-// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
+// CHECK-LE-LABEL: define{{.*}} <4 x i32> @test_toobig_hva(i32 %n, ...)
+// CHECK-LE: [[THELIST:%.*]] = alloca i8*
+// CHECK-LE: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
   // of stack consumed.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
-// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
+// CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
+// CHECK-LE: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
-// CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHVA**
-// CHECK: [[HVAPTR:%.*]] = load %struct.TooBigHVA*, %struct.TooBigHVA** [[HVAPTRPTR]]
+// CHECK-LE: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHVA**
+// CHECK-LE: [[HVAPTR:%.*]] = load %struct.TooBigHVA*, %struct.TooBigHVA** [[HVAPTRPTR]]
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
   struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);
@@ -719,21 +752,21 @@ typedef __attribute__((__ext_vector_type__(3))) float float32x3_t;
 typedef struct { float32x3_t arr[4]; } HFAv3;
 
 float32x3_t test_hva_v3(int n, ...) {
-// CHECK-LABEL: define{{.*}} <3 x float> @test_hva_v3(i32 %n, ...)
-// CHECK: [[THELIST:%.*]] = alloca i8*
-// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
+// CHECK-LE-LABEL: define{{.*}} <3 x float> @test_hva_v3(i32 %n, ...)
+// CHECK-LE: [[THELIST:%.*]] = alloca i8*
+// CHECK-LE: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
   // must be properly aligned.
-// CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
-// CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
-// CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
-// CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
+// CHECK-LE: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
+// CHECK-LE: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
+// CHECK-LE: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
+// CHECK-LE: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
 
-// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 64
-// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
+// CHECK-LE: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 64
+// CHECK-LE: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
-// CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HFAv3*
+// CHECK-LE: bitcast i8* [[ALIGNED_LIST]] to %struct.HFAv3*
   __builtin_va_list l;
   __builtin_va_start(l, n);
   HFAv3 r = __builtin_va_arg(l, HFAv3);

diff  --git a/clang/test/CodeGen/arm64-microsoft-arguments.cpp b/clang/test/CodeGen/arm64-microsoft-arguments.cpp
index 8a2fb2e01a62..d9160f8ded0c 100644
--- a/clang/test/CodeGen/arm64-microsoft-arguments.cpp
+++ b/clang/test/CodeGen/arm64-microsoft-arguments.cpp
@@ -104,8 +104,8 @@ S5 f5() {
 
 // Pass and return an object with a non-trivial explicitly defaulted constructor
 // (passed directly, returned directly)
-// CHECK: define {{.*}} i64 @"?f6@@YA?AUS6@@XZ"()
-// CHECK: call i64 {{.*}}func6{{.*}}(i64 {{.*}})
+// CHECK: define {{.*}} i8 @"?f6@@YA?AUS6@@XZ"()
+// CHECK: call i8 {{.*}}func6{{.*}}(i64 {{.*}})
 struct S6a {
   S6a();
 };
@@ -123,8 +123,8 @@ S6 f6() {
 
 // Pass and return an object with a non-trivial implicitly defaulted constructor
 // (passed directly, returned directly)
-// CHECK: define {{.*}} i64 @"?f7@@YA?AUS7@@XZ"()
-// CHECK: call i64 {{.*}}func7{{.*}}(i64 {{.*}})
+// CHECK: define {{.*}} i8 @"?f7@@YA?AUS7@@XZ"()
+// CHECK: call i8 {{.*}}func7{{.*}}(i64 {{.*}})
 struct S7 {
   S6a x;
 };

diff  --git a/clang/test/CodeGen/attr-noundef.cpp b/clang/test/CodeGen/attr-noundef.cpp
index 0f05795adf4b..949e05110418 100644
--- a/clang/test/CodeGen/attr-noundef.cpp
+++ b/clang/test/CodeGen/attr-noundef.cpp
@@ -11,7 +11,7 @@ struct Trivial {
 Trivial ret_trivial() { return {}; }
 void pass_trivial(Trivial e) {}
 // CHECK-INTEL: [[DEFINE:define( dso_local)?]] i32 @{{.*}}ret_trivial
-// CHECK-AARCH: [[DEFINE:define( dso_local)?]] i64 @{{.*}}ret_trivial
+// CHECK-AARCH: [[DEFINE:define( dso_local)?]] i32 @{{.*}}ret_trivial
 // CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i32 %
 // CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i64 %
 
@@ -43,7 +43,7 @@ union Trivial {
 Trivial ret_trivial() { return {}; }
 void pass_trivial(Trivial e) {}
 // CHECK-INTEL: [[DEFINE]] i32 @{{.*}}ret_trivial
-// CHECK-AARCH: [[DEFINE]] i64 @{{.*}}ret_trivial
+// CHECK-AARCH: [[DEFINE]] i32 @{{.*}}ret_trivial
 // CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i32 %
 // CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i64 %
 

diff  --git a/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp b/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
index b36ea9ccd9f0..94d59eb8703f 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
@@ -87,7 +87,7 @@ Small small_return() { return Small(); }
 // LINUX-LABEL: define{{.*}} void @_Z12small_returnv(%struct.Small* noalias sret(%struct.Small) align 4 %agg.result)
 // WIN32: define dso_local i32 @"?small_return@@YA?AUSmall@@XZ"()
 // WIN64: define dso_local i32 @"?small_return@@YA?AUSmall@@XZ"()
-// WOA64: define dso_local i64 @"?small_return@@YA?AUSmall@@XZ"()
+// WOA64: define dso_local i32 @"?small_return@@YA?AUSmall@@XZ"()
 
 Medium medium_return() { return Medium(); }
 // LINUX-LABEL: define{{.*}} void @_Z13medium_returnv(%struct.Medium* noalias sret(%struct.Medium) align 4 %agg.result)

diff  --git a/clang/test/CodeGenCXX/trivial_abi.cpp b/clang/test/CodeGenCXX/trivial_abi.cpp
index a4222c100311..07efa5f7a363 100644
--- a/clang/test/CodeGenCXX/trivial_abi.cpp
+++ b/clang/test/CodeGenCXX/trivial_abi.cpp
@@ -198,12 +198,11 @@ void testIgnoredLarge() {
   testReturnLarge();
 }
 
-// CHECK: define{{.*}} i64 @_Z20testReturnHasTrivialv()
+// CHECK: define{{.*}} i32 @_Z20testReturnHasTrivialv()
 // CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_TRIVIAL:.*]], align 4
 // CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_TRIVIAL]], %[[STRUCT_TRIVIAL]]* %[[RETVAL]], i32 0, i32 0
 // CHECK: %[[V0:.*]] = load i32, i32* %[[COERCE_DIVE]], align 4
-// CHECK: %[[COERCE_VAL_II:.*]] = zext i32 %[[V0]] to i64
-// CHECK: ret i64 %[[COERCE_VAL_II]]
+// CHECK: ret i32 %[[V0]]
 // CHECK: }
 
 Trivial testReturnHasTrivial() {