[clang] a21abc7 - [X86] Align i128 to 16 bytes in x86 datalayouts
Harald van Dijk via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 11 02:24:02 PDT 2023
Author: Harald van Dijk
Date: 2023-10-11T10:23:38+01:00
New Revision: a21abc782a8e1cb718a10c471a3b634f3102fc1c
URL: https://github.com/llvm/llvm-project/commit/a21abc782a8e1cb718a10c471a3b634f3102fc1c
DIFF: https://github.com/llvm/llvm-project/commit/a21abc782a8e1cb718a10c471a3b634f3102fc1c.diff
LOG: [X86] Align i128 to 16 bytes in x86 datalayouts
This is an attempt at rebooting https://reviews.llvm.org/D28990
I've included AutoUpgrade changes to modify the data layout to satisfy the compatible layout check. But this does mean alloca, loads, stores, etc in old IR will automatically get this new alignment.
This should fix PR46320.
Reviewed By: echristo, rnk, tmgross
Differential Revision: https://reviews.llvm.org/D86310
Added:
llvm/test/Bitcode/upgrade-datalayout5.ll
Modified:
clang/lib/Basic/Targets/OSTargets.h
clang/lib/Basic/Targets/X86.h
clang/test/CodeGen/target-data.c
llvm/docs/ReleaseNotes.rst
llvm/lib/IR/AutoUpgrade.cpp
llvm/lib/Target/X86/X86TargetMachine.cpp
llvm/test/Bitcode/upgrade-datalayout.ll
llvm/test/Bitcode/upgrade-datalayout2.ll
llvm/test/Bitcode/upgrade-datalayout3.ll
llvm/test/Bitcode/upgrade-datalayout4.ll
llvm/test/CodeGen/X86/AMX/amx-config.ll
llvm/test/CodeGen/X86/arg-copy-elide.ll
llvm/test/CodeGen/X86/atomic-idempotent.ll
llvm/test/CodeGen/X86/atomic-non-integer.ll
llvm/test/CodeGen/X86/atomic-unordered.ll
llvm/test/CodeGen/X86/atomic-xor.ll
llvm/test/CodeGen/X86/atomic128.ll
llvm/test/CodeGen/X86/avx512fp16-cvt.ll
llvm/test/CodeGen/X86/bitcast-i256.ll
llvm/test/CodeGen/X86/catchpad-dynamic-alloca.ll
llvm/test/CodeGen/X86/fp-intrinsics.ll
llvm/test/CodeGen/X86/fp128-cast-strict.ll
llvm/test/CodeGen/X86/fp128-cast.ll
llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
llvm/test/CodeGen/X86/fp128-libcalls.ll
llvm/test/CodeGen/X86/fpenv-combine.ll
llvm/test/CodeGen/X86/fpenv.ll
llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
llvm/test/CodeGen/X86/implicit-null-check.ll
llvm/test/CodeGen/X86/osx-private-labels.ll
llvm/test/CodeGen/X86/scheduler-backtracking.ll
llvm/test/CodeGen/X86/sdiv_fix.ll
llvm/test/CodeGen/X86/sdiv_fix_sat.ll
llvm/test/CodeGen/X86/setcc-wide-types.ll
llvm/test/CodeGen/X86/smul-with-overflow.ll
llvm/test/CodeGen/X86/sret-implicit.ll
llvm/test/CodeGen/X86/statepoint-deopt-lowering.ll
llvm/test/CodeGen/X86/statepoint-vector.ll
llvm/test/CodeGen/X86/udiv_fix.ll
llvm/test/CodeGen/X86/udiv_fix_sat.ll
llvm/test/tools/llvm-lto2/X86/pipeline.ll
llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll
llvm/test/tools/llvm-lto2/X86/stats-file-option.ll
llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
llvm/unittests/CodeGen/InstrRefLDVTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index f2bd846e670d145..23799d8a4ae17bc 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -817,10 +817,10 @@ class LLVM_LIBRARY_VISIBILITY NaClTargetInfo : public OSTargetInfo<Target> {
// Handled in ARM's setABI().
} else if (Triple.getArch() == llvm::Triple::x86) {
this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
- "i64:64-n8:16:32-S128");
+ "i64:64-i128:128-n8:16:32-S128");
} else if (Triple.getArch() == llvm::Triple::x86_64) {
this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
- "i64:64-n8:16:32:64-S128");
+ "i64:64-i128:128-n8:16:32:64-S128");
} else if (Triple.getArch() == llvm::Triple::mipsel) {
// Handled on mips' setDataLayout.
} else {
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index b759c76fc95ca0c..4fdc94de1e0cb4d 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -431,13 +431,12 @@ class LLVM_LIBRARY_VISIBILITY X86_32TargetInfo : public X86TargetInfo {
LongDoubleWidth = 96;
LongDoubleAlign = 32;
SuitableAlign = 128;
- resetDataLayout(
- Triple.isOSBinFormatMachO()
- ? "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-"
- "f80:32-n8:16:32-S128"
- : "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-"
- "f80:32-n8:16:32-S128",
- Triple.isOSBinFormatMachO() ? "_" : "");
+ resetDataLayout(Triple.isOSBinFormatMachO()
+ ? "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:"
+ "128-f64:32:64-f80:32-n8:16:32-S128"
+ : "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:"
+ "128-f64:32:64-f80:32-n8:16:32-S128",
+ Triple.isOSBinFormatMachO() ? "_" : "");
SizeType = UnsignedInt;
PtrDiffType = SignedInt;
IntPtrType = SignedInt;
@@ -542,8 +541,9 @@ class LLVM_LIBRARY_VISIBILITY DarwinI386TargetInfo
UseSignedCharForObjCBool = false;
SizeType = UnsignedLong;
IntPtrType = SignedLong;
- resetDataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-"
- "f80:128-n8:16:32-S128", "_");
+ resetDataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-"
+ "f64:32:64-f80:128-n8:16:32-S128",
+ "_");
HasAlignMac68kSupport = true;
}
@@ -570,7 +570,7 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_32TargetInfo
getTriple().isOSWindows() && getTriple().isOSBinFormatCOFF();
bool IsMSVC = getTriple().isWindowsMSVCEnvironment();
std::string Layout = IsWinCOFF ? "e-m:x" : "e-m:e";
- Layout += "-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-";
+ Layout += "-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-";
Layout += IsMSVC ? "f80:128" : "f80:32";
Layout += "-n8:16:32-a:0:32-S32";
resetDataLayout(Layout, IsWinCOFF ? "_" : "");
@@ -621,8 +621,8 @@ class LLVM_LIBRARY_VISIBILITY CygwinX86_32TargetInfo : public X86_32TargetInfo {
: X86_32TargetInfo(Triple, Opts) {
this->WCharType = TargetInfo::UnsignedShort;
DoubleAlign = LongLongAlign = 64;
- resetDataLayout("e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:"
- "32-n8:16:32-a:0:32-S32",
+ resetDataLayout("e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-"
+ "i128:128-f80:32-n8:16:32-a:0:32-S32",
"_");
}
@@ -660,8 +660,8 @@ class LLVM_LIBRARY_VISIBILITY MCUX86_32TargetInfo : public X86_32TargetInfo {
: X86_32TargetInfo(Triple, Opts) {
LongDoubleWidth = 64;
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
- resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:32-f64:"
- "32-f128:32-n8:16:32-a:0:32-S32");
+ resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:32-"
+ "f64:32-f128:32-n8:16:32-a:0:32-S32");
WIntType = UnsignedInt;
}
@@ -721,11 +721,11 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
// Pointers are 32-bit in x32.
resetDataLayout(IsX32 ? "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
- "i64:64-f80:128-n8:16:32:64-S128"
- : IsWinCOFF ? "e-m:w-p270:32:32-p271:32:32-p272:64:"
- "64-i64:64-f80:128-n8:16:32:64-S128"
- : "e-m:e-p270:32:32-p271:32:32-p272:64:"
- "64-i64:64-f80:128-n8:16:32:64-S128");
+ "i64:64-i128:128-f80:128-n8:16:32:64-S128"
+ : IsWinCOFF ? "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:"
+ "64-i128:128-f80:128-n8:16:32:64-S128"
+ : "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:"
+ "64-i128:128-f80:128-n8:16:32:64-S128");
// Use fpret only for long double.
RealTypeUsesObjCFPRetMask = (unsigned)FloatModeKind::LongDouble;
@@ -922,8 +922,9 @@ class LLVM_LIBRARY_VISIBILITY DarwinX86_64TargetInfo
llvm::Triple T = llvm::Triple(Triple);
if (T.isiOS())
UseSignedCharForObjCBool = false;
- resetDataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:"
- "16:32:64-S128", "_");
+ resetDataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-"
+ "f80:128-n8:16:32:64-S128",
+ "_");
}
bool handleTargetFeatures(std::vector<std::string> &Features,
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 3649ddd651ffc3c..c5cb922576dd461 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -1,26 +1,26 @@
// RUN: %clang_cc1 -triple i686-unknown-unknown -emit-llvm -o - %s | \
// RUN: FileCheck --check-prefix=I686-UNKNOWN %s
-// I686-UNKNOWN: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
+// I686-UNKNOWN: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
// RUN: %clang_cc1 -triple i686-apple-darwin9 -emit-llvm -o - %s | \
// RUN: FileCheck --check-prefix=I686-DARWIN %s
-// I686-DARWIN: target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:128-n8:16:32-S128"
+// I686-DARWIN: target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:128-n8:16:32-S128"
// RUN: %clang_cc1 -triple i686-unknown-win32 -emit-llvm -o - %s | \
// RUN: FileCheck --check-prefix=I686-WIN32 %s
-// I686-WIN32: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
+// I686-WIN32: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-a:0:32-S32"
// RUN: %clang_cc1 -triple i686-unknown-cygwin -emit-llvm -o - %s | \
// RUN: FileCheck --check-prefix=I686-CYGWIN %s
-// I686-CYGWIN: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
+// I686-CYGWIN: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-a:0:32-S32"
// RUN: %clang_cc1 -triple i686-pc-macho -emit-llvm -o - %s | \
// RUN: FileCheck --check-prefix=I686-MACHO %s
-// I686-MACHO: target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
+// I686-MACHO: target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
// RUN: FileCheck --check-prefix=X86_64 %s
-// X86_64: target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+// X86_64: target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
// RUN: %clang_cc1 -triple xcore-unknown-unknown -emit-llvm -o - %s | \
// RUN: FileCheck --check-prefix=XCORE %s
@@ -92,11 +92,11 @@
// RUN: %clang_cc1 -triple i686-nacl -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=I686-NACL
-// I686-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-n8:16:32-S128"
+// I686-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n8:16:32-S128"
// RUN: %clang_cc1 -triple x86_64-nacl -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=X86_64-NACL
-// X86_64-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-n8:16:32:64-S128"
+// X86_64-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n8:16:32:64-S128"
// RUN: %clang_cc1 -triple arm-nacl -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=ARM-NACL
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index f0d4b5c5dfc7aff..3453c7e61ae4a63 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -129,6 +129,11 @@ Changes to the Windows Target
Changes to the X86 Backend
--------------------------
+* The ``i128`` type now matches GCC and clang's ``__int128`` type. This mainly
+ benefits external projects such as Rust which aim to be binary compatible
+ with C, but also fixes code generation where LLVM already assumed that the
+ type matched and called into libgcc helper functions.
+
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 304edc2c3a2c374..e102aae52597a8a 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -5201,13 +5201,29 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
// If the datalayout matches the expected format, add pointer size address
// spaces to the datalayout.
std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
- if (!DL.contains(AddrSpaces)) {
+ if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
SmallVector<StringRef, 4> Groups;
Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
- if (R.match(DL, &Groups))
+ if (R.match(Res, &Groups))
Res = (Groups[1] + AddrSpaces + Groups[3]).str();
}
+ // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
+ // for i128 operations prior to this being reflected in the data layout, and
+ // clang mostly produced LLVM IR that already aligned i128 to 16 byte
+ // boundaries, so although this is a breaking change, the upgrade is expected
+ // to fix more IR than it breaks.
+ // Intel MCU is an exception and uses 4-byte-alignment.
+ if (!T.isOSIAMCU()) {
+ std::string I128 = "-i128:128";
+ if (StringRef Ref = Res; !Ref.contains(I128)) {
+ SmallVector<StringRef, 4> Groups;
+ Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
+ if (R.match(Res, &Groups))
+ Res = (Groups[1] + I128 + Groups[3]).str();
+ }
+ }
+
// For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
// Raising the alignment is safe because Clang did not produce f80 values in
// the MSVC environment before this upgrade was added.
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index c0d3b8aa93e6cec..82c15c916c51fc6 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -130,12 +130,14 @@ static std::string computeDataLayout(const Triple &TT) {
Ret += "-p270:32:32-p271:32:32-p272:64:64";
// Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
+ // 128 bit integers are not specified in the 32-bit ABIs but are used
+ // internally for lowering f128, so we match the alignment to that.
if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
- Ret += "-i64:64";
+ Ret += "-i64:64-i128:128";
else if (TT.isOSIAMCU())
Ret += "-i64:32-f64:32";
else
- Ret += "-f64:32:64";
+ Ret += "-i128:128-f64:32:64";
// Some ABIs align long double to 128 bits, others to 32.
if (TT.isOSNaCl() || TT.isOSIAMCU())
diff --git a/llvm/test/Bitcode/upgrade-datalayout.ll b/llvm/test/Bitcode/upgrade-datalayout.ll
index 8c00294f2ba5b17..dc2c459cd19bd95 100644
--- a/llvm/test/Bitcode/upgrade-datalayout.ll
+++ b/llvm/test/Bitcode/upgrade-datalayout.ll
@@ -5,5 +5,5 @@
target datalayout = "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; CHECK: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+; CHECK: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/Bitcode/upgrade-datalayout2.ll b/llvm/test/Bitcode/upgrade-datalayout2.ll
index 21de5b8a67d1f44..b516c08fecf77bd 100644
--- a/llvm/test/Bitcode/upgrade-datalayout2.ll
+++ b/llvm/test/Bitcode/upgrade-datalayout2.ll
@@ -2,6 +2,12 @@
; match a possible x86 datalayout.
;
; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s
+;
+; XFAIL: *
+; No implementation of the data layout upgrade ever checked whether the data
+; layout was a possible x86 data layout, so the logic that this test aims to
+; check was never implemented. We always upgraded data layouts that were not
+; possible x86 data layouts, we merely did not previously upgrade this one.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Bitcode/upgrade-datalayout3.ll b/llvm/test/Bitcode/upgrade-datalayout3.ll
index 6d95f2407acf40a..f59097b57b56ffd 100644
--- a/llvm/test/Bitcode/upgrade-datalayout3.ll
+++ b/llvm/test/Bitcode/upgrade-datalayout3.ll
@@ -5,4 +5,4 @@
target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
target triple = "i686-pc-windows-msvc"
-; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-S32"
+; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-S32"
diff --git a/llvm/test/Bitcode/upgrade-datalayout4.ll b/llvm/test/Bitcode/upgrade-datalayout4.ll
index ee0e5fe3bf6fa4d..026247bcce43da5 100644
--- a/llvm/test/Bitcode/upgrade-datalayout4.ll
+++ b/llvm/test/Bitcode/upgrade-datalayout4.ll
@@ -5,4 +5,4 @@
target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i686-pc-windows-msvc"
-; CHECK: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
+; CHECK: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-a:0:32-S32"
diff --git a/llvm/test/Bitcode/upgrade-datalayout5.ll b/llvm/test/Bitcode/upgrade-datalayout5.ll
new file mode 100644
index 000000000000000..2b1d5d1467cf814
--- /dev/null
+++ b/llvm/test/Bitcode/upgrade-datalayout5.ll
@@ -0,0 +1,8 @@
+; Test to make sure datalayout is automatically upgraded.
+;
+; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+; CHECK: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
diff --git a/llvm/test/CodeGen/X86/AMX/amx-config.ll b/llvm/test/CodeGen/X86/AMX/amx-config.ll
index 74282a0afee76b6..275c85389c4db1c 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-config.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-config.ll
@@ -79,10 +79,10 @@ define <4 x i32> @test_api(i32 %0, i16 signext %1, i16 signext %2, <4 x i32> %xm
; AVX1-LABEL: test_api:
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: movups %xmm1, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: vmovups %xmm1, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: vmovups %xmm1, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: vmovups %xmm1, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: vmovups %xmm1, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movb $1, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll
index 9d57c9cb2b423e6..0eb2c630e681898 100644
--- a/llvm/test/CodeGen/X86/arg-copy-elide.ll
+++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll
@@ -186,8 +186,8 @@ define void @split_i128(ptr %sret, i128 %x) {
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
-; CHECK-NEXT: andl $-8, %esp
-; CHECK-NEXT: subl $32, %esp
+; CHECK-NEXT: andl $-16, %esp
+; CHECK-NEXT: subl $48, %esp
; CHECK-NEXT: movl 12(%ebp), %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl 16(%ebp), %ebx
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index ef9ed7936366864..3a9648bd1fbb502 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -182,12 +182,10 @@ define i128 @or128(ptr %p) {
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
-; X86-SSE2-NEXT: andl $-8, %esp
-; X86-SSE2-NEXT: subl $16, %esp
-; X86-SSE2-NEXT: .cfi_offset %esi, -16
-; X86-SSE2-NEXT: .cfi_offset %edi, -12
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $32, %esp
+; X86-SSE2-NEXT: .cfi_offset %esi, -12
; X86-SSE2-NEXT: movl 8(%ebp), %esi
; X86-SSE2-NEXT: movl %esp, %eax
; X86-SSE2-NEXT: pushl $0
@@ -198,18 +196,11 @@ define i128 @or128(ptr %p) {
; X86-SSE2-NEXT: pushl %eax
; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
; X86-SSE2-NEXT: addl $20, %esp
-; X86-SSE2-NEXT: movl (%esp), %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SSE2-NEXT: movl %edi, 8(%esi)
-; X86-SSE2-NEXT: movl %edx, 12(%esi)
-; X86-SSE2-NEXT: movl %eax, (%esi)
-; X86-SSE2-NEXT: movl %ecx, 4(%esi)
+; X86-SSE2-NEXT: movaps (%esp), %xmm0
+; X86-SSE2-NEXT: movaps %xmm0, (%esi)
; X86-SSE2-NEXT: movl %esi, %eax
-; X86-SSE2-NEXT: leal -8(%ebp), %esp
+; X86-SSE2-NEXT: leal -4(%ebp), %esp
; X86-SSE2-NEXT: popl %esi
-; X86-SSE2-NEXT: popl %edi
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
; X86-SSE2-NEXT: retl $4
@@ -223,7 +214,7 @@ define i128 @or128(ptr %p) {
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
; X86-SLM-NEXT: pushl %edi
; X86-SLM-NEXT: pushl %esi
-; X86-SLM-NEXT: andl $-8, %esp
+; X86-SLM-NEXT: andl $-16, %esp
; X86-SLM-NEXT: subl $16, %esp
; X86-SLM-NEXT: .cfi_offset %esi, -16
; X86-SLM-NEXT: .cfi_offset %edi, -12
@@ -263,7 +254,7 @@ define i128 @or128(ptr %p) {
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
; X86-ATOM-NEXT: pushl %edi
; X86-ATOM-NEXT: pushl %esi
-; X86-ATOM-NEXT: andl $-8, %esp
+; X86-ATOM-NEXT: andl $-16, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: .cfi_offset %esi, -16
; X86-ATOM-NEXT: .cfi_offset %edi, -12
@@ -528,8 +519,8 @@ define void @or128_nouse_seq_cst(ptr %p) {
; X86-SSE2-NEXT: .cfi_offset %ebp, -8
; X86-SSE2-NEXT: movl %esp, %ebp
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT: andl $-8, %esp
-; X86-SSE2-NEXT: subl $16, %esp
+; X86-SSE2-NEXT: andl $-16, %esp
+; X86-SSE2-NEXT: subl $32, %esp
; X86-SSE2-NEXT: movl %esp, %eax
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $0
@@ -551,8 +542,8 @@ define void @or128_nouse_seq_cst(ptr %p) {
; X86-SLM-NEXT: .cfi_offset %ebp, -8
; X86-SLM-NEXT: movl %esp, %ebp
; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
-; X86-SLM-NEXT: andl $-8, %esp
-; X86-SLM-NEXT: subl $16, %esp
+; X86-SLM-NEXT: andl $-16, %esp
+; X86-SLM-NEXT: subl $32, %esp
; X86-SLM-NEXT: movl 8(%ebp), %eax
; X86-SLM-NEXT: movl %esp, %ecx
; X86-SLM-NEXT: pushl $0
@@ -575,7 +566,7 @@ define void @or128_nouse_seq_cst(ptr %p) {
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
; X86-ATOM-NEXT: leal (%esp), %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
-; X86-ATOM-NEXT: andl $-8, %esp
+; X86-ATOM-NEXT: andl $-16, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
; X86-ATOM-NEXT: movl 8(%ebp), %eax
; X86-ATOM-NEXT: movl %esp, %ecx
diff --git a/llvm/test/CodeGen/X86/atomic-non-integer.ll b/llvm/test/CodeGen/X86/atomic-non-integer.ll
index 84f944f2bd909e4..7d2810e57a25b5e 100644
--- a/llvm/test/CodeGen/X86/atomic-non-integer.ll
+++ b/llvm/test/CodeGen/X86/atomic-non-integer.ll
@@ -157,8 +157,8 @@ define void @store_fp128(ptr %fptr, fp128 %v) {
;
; X86-AVX-LABEL: store_fp128:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: subl $44, %esp
-; X86-AVX-NEXT: .cfi_def_cfa_offset 48
+; X86-AVX-NEXT: subl $60, %esp
+; X86-AVX-NEXT: .cfi_def_cfa_offset 64
; X86-AVX-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl %eax, {{[0-9]+}}(%esp)
@@ -166,7 +166,7 @@ define void @store_fp128(ptr %fptr, fp128 %v) {
; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-AVX-NEXT: movl %eax, (%esp)
; X86-AVX-NEXT: calll __sync_lock_test_and_set_16
-; X86-AVX-NEXT: addl $40, %esp
+; X86-AVX-NEXT: addl $56, %esp
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
;
@@ -394,67 +394,111 @@ define double @load_double(ptr %fptr) {
}
define fp128 @load_fp128(ptr %fptr) {
-; X86-SSE-LABEL: load_fp128:
-; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %edi
-; X86-SSE-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: .cfi_def_cfa_offset 12
-; X86-SSE-NEXT: subl $20, %esp
-; X86-SSE-NEXT: .cfi_def_cfa_offset 32
-; X86-SSE-NEXT: .cfi_offset %esi, -12
-; X86-SSE-NEXT: .cfi_offset %edi, -8
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE-NEXT: subl $8, %esp
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 8
-; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: pushl $0
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: pushl $0
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: pushl $0
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: pushl $0
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: pushl $0
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: pushl $0
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: pushl $0
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: pushl $0
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: pushl {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: pushl %eax
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4
-; X86-SSE-NEXT: calll __sync_val_compare_and_swap_16
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4
-; X86-SSE-NEXT: addl $44, %esp
-; X86-SSE-NEXT: .cfi_adjust_cfa_offset -44
-; X86-SSE-NEXT: movl (%esp), %eax
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SSE-NEXT: movl %edi, 8(%esi)
-; X86-SSE-NEXT: movl %edx, 12(%esi)
-; X86-SSE-NEXT: movl %eax, (%esi)
-; X86-SSE-NEXT: movl %ecx, 4(%esi)
-; X86-SSE-NEXT: movl %esi, %eax
-; X86-SSE-NEXT: addl $20, %esp
-; X86-SSE-NEXT: .cfi_def_cfa_offset 12
-; X86-SSE-NEXT: popl %esi
-; X86-SSE-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE-NEXT: popl %edi
-; X86-SSE-NEXT: .cfi_def_cfa_offset 4
-; X86-SSE-NEXT: retl $4
+; X86-SSE1-LABEL: load_fp128:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: pushl %edi
+; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE1-NEXT: pushl %esi
+; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X86-SSE1-NEXT: subl $20, %esp
+; X86-SSE1-NEXT: .cfi_def_cfa_offset 32
+; X86-SSE1-NEXT: .cfi_offset %esi, -12
+; X86-SSE1-NEXT: .cfi_offset %edi, -8
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SSE1-NEXT: subl $8, %esp
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 8
+; X86-SSE1-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: pushl $0
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: pushl %eax
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE1-NEXT: calll __sync_val_compare_and_swap_16
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset -4
+; X86-SSE1-NEXT: addl $44, %esp
+; X86-SSE1-NEXT: .cfi_adjust_cfa_offset -44
+; X86-SSE1-NEXT: movl (%esp), %eax
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SSE1-NEXT: movl %edi, 8(%esi)
+; X86-SSE1-NEXT: movl %edx, 12(%esi)
+; X86-SSE1-NEXT: movl %eax, (%esi)
+; X86-SSE1-NEXT: movl %ecx, 4(%esi)
+; X86-SSE1-NEXT: movl %esi, %eax
+; X86-SSE1-NEXT: addl $20, %esp
+; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
+; X86-SSE1-NEXT: popl %esi
+; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE1-NEXT: popl %edi
+; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE1-NEXT: retl $4
+;
+; X86-SSE2-LABEL: load_fp128:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT: subl $24, %esp
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 32
+; X86-SSE2-NEXT: .cfi_offset %esi, -8
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 8
+; X86-SSE2-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: pushl $0
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset 4
+; X86-SSE2-NEXT: calll __sync_val_compare_and_swap_16
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset -4
+; X86-SSE2-NEXT: addl $44, %esp
+; X86-SSE2-NEXT: .cfi_adjust_cfa_offset -44
+; X86-SSE2-NEXT: movaps (%esp), %xmm0
+; X86-SSE2-NEXT: movaps %xmm0, (%esi)
+; X86-SSE2-NEXT: movl %esi, %eax
+; X86-SSE2-NEXT: addl $24, %esp
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE2-NEXT: retl $4
;
; X86-AVX-LABEL: load_fp128:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
-; X86-AVX-NEXT: subl $56, %esp
-; X86-AVX-NEXT: .cfi_def_cfa_offset 64
+; X86-AVX-NEXT: subl $72, %esp
+; X86-AVX-NEXT: .cfi_def_cfa_offset 80
; X86-AVX-NEXT: .cfi_offset %esi, -8
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -466,10 +510,10 @@ define fp128 @load_fp128(ptr %fptr) {
; X86-AVX-NEXT: vzeroupper
; X86-AVX-NEXT: calll __sync_val_compare_and_swap_16
; X86-AVX-NEXT: subl $4, %esp
-; X86-AVX-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-AVX-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
; X86-AVX-NEXT: vmovaps %xmm0, (%esi)
; X86-AVX-NEXT: movl %esi, %eax
-; X86-AVX-NEXT: addl $56, %esp
+; X86-AVX-NEXT: addl $72, %esp
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index 9482d71ebff0334..b66988c8bd24b51 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -322,12 +322,12 @@ define i256 @load_i256(ptr %ptr) {
; CHECK-O0-NEXT: .cfi_def_cfa_offset 64
; CHECK-O0-NEXT: movq %rdi, %rax
; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill
; CHECK-O0-NEXT: movl $32, %edi
; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; CHECK-O0-NEXT: xorl %ecx, %ecx
; CHECK-O0-NEXT: callq __atomic_load at PLT
-; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; CHECK-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx
@@ -373,12 +373,12 @@ define void @store_i256(ptr %ptr, i256 %v) {
; CHECK-O0-NEXT: .cfi_def_cfa_offset 48
; CHECK-O0-NEXT: movq %rsi, %rax
; CHECK-O0-NEXT: movq %rdi, %rsi
-; CHECK-O0-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-O0-NEXT: movq %rax, (%rsp)
; CHECK-O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movl $32, %edi
-; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; CHECK-O0-NEXT: movq %rsp, %rdx
; CHECK-O0-NEXT: xorl %ecx, %ecx
; CHECK-O0-NEXT: callq __atomic_store at PLT
; CHECK-O0-NEXT: addq $40, %rsp
@@ -393,8 +393,8 @@ define void @store_i256(ptr %ptr, i256 %v) {
; CHECK-O3-NEXT: movq %r8, {{[0-9]+}}(%rsp)
; CHECK-O3-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-O3-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-O3-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
-; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
+; CHECK-O3-NEXT: movq %rsi, (%rsp)
+; CHECK-O3-NEXT: movq %rsp, %rdx
; CHECK-O3-NEXT: movl $32, %edi
; CHECK-O3-NEXT: movq %rax, %rsi
; CHECK-O3-NEXT: xorl %ecx, %ecx
diff --git a/llvm/test/CodeGen/X86/atomic-xor.ll b/llvm/test/CodeGen/X86/atomic-xor.ll
index 5f33cb8cd98ab43..97fa908f1b7148d 100644
--- a/llvm/test/CodeGen/X86/atomic-xor.ll
+++ b/llvm/test/CodeGen/X86/atomic-xor.ll
@@ -24,7 +24,7 @@ define i128 @xor128_signbit_used(ptr %p) nounwind {
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
+; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movl 8(%ebp), %esi
; X86-NEXT: movl %esp, %eax
diff --git a/llvm/test/CodeGen/X86/atomic128.ll b/llvm/test/CodeGen/X86/atomic128.ll
index 5e4b885c399b59f..d5600b54a169d2f 100644
--- a/llvm/test/CodeGen/X86/atomic128.ll
+++ b/llvm/test/CodeGen/X86/atomic128.ll
@@ -169,7 +169,7 @@ define void @fetch_and_nand(ptr %p, i128 %bits) {
; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
; CHECK32-NEXT: addl $28, %esp
; CHECK32-NEXT: .cfi_adjust_cfa_offset -28
-; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl (%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -237,7 +237,7 @@ define void @fetch_and_or(ptr %p, i128 %bits) {
; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
; CHECK32-NEXT: addl $28, %esp
; CHECK32-NEXT: .cfi_adjust_cfa_offset -28
-; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl (%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -305,7 +305,7 @@ define void @fetch_and_add(ptr %p, i128 %bits) {
; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
; CHECK32-NEXT: addl $28, %esp
; CHECK32-NEXT: .cfi_adjust_cfa_offset -28
-; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl (%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -373,7 +373,7 @@ define void @fetch_and_sub(ptr %p, i128 %bits) {
; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
; CHECK32-NEXT: addl $28, %esp
; CHECK32-NEXT: .cfi_adjust_cfa_offset -28
-; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl (%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -444,7 +444,7 @@ define void @fetch_and_min(ptr %p, i128 %bits) {
; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
; CHECK32-NEXT: addl $28, %esp
; CHECK32-NEXT: .cfi_adjust_cfa_offset -28
-; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl (%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -515,7 +515,7 @@ define void @fetch_and_max(ptr %p, i128 %bits) {
; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
; CHECK32-NEXT: addl $28, %esp
; CHECK32-NEXT: .cfi_adjust_cfa_offset -28
-; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl (%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -586,7 +586,7 @@ define void @fetch_and_umin(ptr %p, i128 %bits) {
; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
; CHECK32-NEXT: addl $28, %esp
; CHECK32-NEXT: .cfi_adjust_cfa_offset -28
-; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl (%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -657,7 +657,7 @@ define void @fetch_and_umax(ptr %p, i128 %bits) {
; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
; CHECK32-NEXT: addl $28, %esp
; CHECK32-NEXT: .cfi_adjust_cfa_offset -28
-; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK32-NEXT: movl (%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
index c3a979f9840bd75..e1e013528738a4d 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll
@@ -804,8 +804,8 @@ define i128 @half_to_s128(half %x) {
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $32, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $48, %esp
; X86-NEXT: .cfi_offset %esi, -12
; X86-NEXT: movl 8(%ebp), %esi
; X86-NEXT: vmovsh 12(%ebp), %xmm0
@@ -814,8 +814,8 @@ define i128 @half_to_s128(half %x) {
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixhfti
; X86-NEXT: subl $4, %esp
-; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT: vmovups %xmm0, (%esi)
+; X86-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: leal -4(%ebp), %esp
; X86-NEXT: popl %esi
@@ -907,8 +907,8 @@ define i128 @half_to_u128(half %x) {
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $32, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $48, %esp
; X86-NEXT: .cfi_offset %esi, -12
; X86-NEXT: movl 8(%ebp), %esi
; X86-NEXT: vmovsh 12(%ebp), %xmm0
@@ -917,8 +917,8 @@ define i128 @half_to_u128(half %x) {
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __fixunshfti
; X86-NEXT: subl $4, %esp
-; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT: vmovups %xmm0, (%esi)
+; X86-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: leal -4(%ebp), %esp
; X86-NEXT: popl %esi
@@ -987,8 +987,8 @@ define fp128 @half_to_f128(half %x) nounwind {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $32, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $48, %esp
; X86-NEXT: movl 8(%ebp), %esi
; X86-NEXT: vmovsh 12(%ebp), %xmm0
; X86-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
@@ -997,7 +997,7 @@ define fp128 @half_to_f128(half %x) nounwind {
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __extendsftf2
; X86-NEXT: subl $4, %esp
-; X86-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: vmovaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: leal -4(%ebp), %esp
diff --git a/llvm/test/CodeGen/X86/bitcast-i256.ll b/llvm/test/CodeGen/X86/bitcast-i256.ll
index 0a1953bcbe19efa..57fc26600cb7f1d 100644
--- a/llvm/test/CodeGen/X86/bitcast-i256.ll
+++ b/llvm/test/CodeGen/X86/bitcast-i256.ll
@@ -14,7 +14,7 @@ define i256 @foo(<8 x i32> %a) {
; SLOW: # %bb.0:
; SLOW-NEXT: movq %rdi, %rax
; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi)
-; SLOW-NEXT: vmovups %xmm0, (%rdi)
+; SLOW-NEXT: vmovaps %xmm0, (%rdi)
; SLOW-NEXT: vzeroupper
; SLOW-NEXT: retq
%r = bitcast <8 x i32> %a to i256
diff --git a/llvm/test/CodeGen/X86/catchpad-dynamic-alloca.ll b/llvm/test/CodeGen/X86/catchpad-dynamic-alloca.ll
index 88530cc0e1d46c1..6b63162717e7f1c 100644
--- a/llvm/test/CodeGen/X86/catchpad-dynamic-alloca.ll
+++ b/llvm/test/CodeGen/X86/catchpad-dynamic-alloca.ll
@@ -62,4 +62,4 @@ catch.switch:
; CHECK-LABEL: $handlerMap$0$test2:
; CHECK: .long 0
; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 8
+; CHECK-NEXT: .long 16
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index f4689b2ab6bb944..32e45adcb94d71f 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -1145,9 +1145,8 @@ define i128 @f20s128(double %x) nounwind strictfp {
;
; X86-SSE-LABEL: f20s128:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: subl $36, %esp
+; X86-SSE-NEXT: subl $40, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
@@ -1155,18 +1154,11 @@ define i128 @f20s128(double %x) nounwind strictfp {
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixdfti
; X86-SSE-NEXT: subl $4, %esp
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SSE-NEXT: movl %edi, 8(%esi)
-; X86-SSE-NEXT: movl %edx, 12(%esi)
-; X86-SSE-NEXT: movl %eax, (%esi)
-; X86-SSE-NEXT: movl %ecx, 4(%esi)
+; X86-SSE-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
+; X86-SSE-NEXT: movaps %xmm0, (%esi)
; X86-SSE-NEXT: movl %esi, %eax
-; X86-SSE-NEXT: addl $36, %esp
+; X86-SSE-NEXT: addl $40, %esp
; X86-SSE-NEXT: popl %esi
-; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: retl $4
;
; SSE-LABEL: f20s128:
@@ -1490,9 +1482,8 @@ define i128 @f20u128(double %x) nounwind strictfp {
;
; X86-SSE-LABEL: f20u128:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: subl $36, %esp
+; X86-SSE-NEXT: subl $40, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
@@ -1500,18 +1491,11 @@ define i128 @f20u128(double %x) nounwind strictfp {
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixunsdfti
; X86-SSE-NEXT: subl $4, %esp
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SSE-NEXT: movl %edi, 8(%esi)
-; X86-SSE-NEXT: movl %edx, 12(%esi)
-; X86-SSE-NEXT: movl %eax, (%esi)
-; X86-SSE-NEXT: movl %ecx, 4(%esi)
+; X86-SSE-NEXT: movaps {{[0-9]+}}(%esp), %xmm0
+; X86-SSE-NEXT: movaps %xmm0, (%esi)
; X86-SSE-NEXT: movl %esi, %eax
-; X86-SSE-NEXT: addl $36, %esp
+; X86-SSE-NEXT: addl $40, %esp
; X86-SSE-NEXT: popl %esi
-; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: retl $4
;
; SSE-LABEL: f20u128:
diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
index e116697adc63364..f141153d059acb1 100644
--- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll
@@ -37,7 +37,7 @@ define dso_local void @TestFPExtF16_F128() nounwind strictfp {
; X86-LABEL: TestFPExtF16_F128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: movzwl vf16, %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll __gnu_h2f_ieee
@@ -55,7 +55,7 @@ define dso_local void @TestFPExtF16_F128() nounwind strictfp {
; X86-NEXT: movl %edx, vf128+8
; X86-NEXT: movl %ecx, vf128+4
; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $40, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
@@ -87,7 +87,7 @@ define dso_local void @TestFPExtF32_F128() nounwind strictfp {
; X86-LABEL: TestFPExtF32_F128:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: flds vf32
; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: wait
@@ -103,7 +103,7 @@ define dso_local void @TestFPExtF32_F128() nounwind strictfp {
; X86-NEXT: movl %edx, vf128+8
; X86-NEXT: movl %ecx, vf128+4
; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: addl $40, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll
index 5bb44436ab60000..b39533c0ee21c29 100644
--- a/llvm/test/CodeGen/X86/fp128-cast.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast.ll
@@ -34,7 +34,7 @@ define dso_local void @TestFPExtF32_F128() nounwind {
; X32-LABEL: TestFPExtF32_F128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $24, %esp
+; X32-NEXT: subl $40, %esp
; X32-NEXT: flds vf32
; X32-NEXT: fstps {{[0-9]+}}(%esp)
; X32-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -49,7 +49,7 @@ define dso_local void @TestFPExtF32_F128() nounwind {
; X32-NEXT: movl %edx, vf128+8
; X32-NEXT: movl %ecx, vf128+4
; X32-NEXT: movl %eax, vf128
-; X32-NEXT: addl $24, %esp
+; X32-NEXT: addl $40, %esp
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
@@ -424,7 +424,7 @@ define dso_local void @TestFPToSIF128_I128() nounwind {
; X32-NEXT: pushl %eax
; X32-NEXT: calll __fixtfti
; X32-NEXT: addl $28, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -475,7 +475,7 @@ define dso_local void @TestFPToUIF128_U128() nounwind {
; X32-NEXT: pushl %eax
; X32-NEXT: calll __fixunstfti
; X32-NEXT: addl $28, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -638,7 +638,7 @@ define dso_local void @TestSIToFPI16_F128() nounwind {
; X32-NEXT: pushl %ecx
; X32-NEXT: calll __floatsitf
; X32-NEXT: addl $12, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -686,7 +686,7 @@ define dso_local void @TestSIToFPU16_F128() nounwind {
; X32-NEXT: pushl %ecx
; X32-NEXT: calll __floatunsitf
; X32-NEXT: addl $12, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -732,7 +732,7 @@ define dso_local void @TestSIToFPI32_F128() nounwind {
; X32-NEXT: pushl %eax
; X32-NEXT: calll __floatsitf
; X32-NEXT: addl $12, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -778,7 +778,7 @@ define dso_local void @TestUIToFPU32_F128() #2 {
; X32-NEXT: pushl %eax
; X32-NEXT: calll __floatunsitf
; X32-NEXT: addl $12, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -825,7 +825,7 @@ define dso_local void @TestSIToFPI64_F128() nounwind {
; X32-NEXT: pushl %eax
; X32-NEXT: calll __floatditf
; X32-NEXT: addl $12, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -872,7 +872,7 @@ define dso_local void @TestUIToFPU64_F128() #2 {
; X32-NEXT: pushl %eax
; X32-NEXT: calll __floatunditf
; X32-NEXT: addl $12, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -922,7 +922,7 @@ define dso_local void @TestSIToFPI128_F128() nounwind {
; X32-NEXT: pushl %eax
; X32-NEXT: calll __floattitf
; X32-NEXT: addl $28, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -973,7 +973,7 @@ define dso_local void @TestUIToFPU128_F128() #2 {
; X32-NEXT: pushl %eax
; X32-NEXT: calll __floatuntitf
; X32-NEXT: addl $28, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -1303,7 +1303,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind {
; X32-NEXT: addl $12, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl (%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: .LBB26_4: # %cleanup
; X32-NEXT: movl %edx, (%esi)
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index caea147ec81cd73..4722ce62f6e56fa 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -21,9 +21,8 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: add:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -38,18 +37,11 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __addtf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%add = call fp128 @llvm.experimental.constrained.fadd.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -66,9 +58,8 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: sub:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -83,18 +74,11 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __subtf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%sub = call fp128 @llvm.experimental.constrained.fsub.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -111,9 +95,8 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: mul:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -128,18 +111,11 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __multf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%mul = call fp128 @llvm.experimental.constrained.fmul.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -156,9 +132,8 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: div:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -173,18 +148,11 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __divtf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%div = call fp128 @llvm.experimental.constrained.fdiv.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -201,9 +169,8 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
;
; X86-LABEL: fma:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -222,18 +189,11 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll fmal
; X86-NEXT: addl $60, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%fma = call fp128 @llvm.experimental.constrained.fma.f128(fp128 %x, fp128 %y, fp128 %z, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -250,9 +210,8 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: frem:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -267,18 +226,11 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll fmodl
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%div = call fp128 @llvm.experimental.constrained.frem.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -295,9 +247,8 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
;
; X86-LABEL: ceil:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -308,18 +259,11 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll ceill
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%ceil = call fp128 @llvm.experimental.constrained.ceil.f128(fp128 %x, metadata !"fpexcept.strict") #0
@@ -336,9 +280,8 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
;
; X86-LABEL: cos:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -349,18 +292,11 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll cosl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%cos = call fp128 @llvm.experimental.constrained.cos.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -377,9 +313,8 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
;
; X86-LABEL: exp:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -390,18 +325,11 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll expl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%exp = call fp128 @llvm.experimental.constrained.exp.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -418,9 +346,8 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
;
; X86-LABEL: exp2:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -431,18 +358,11 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll exp2l
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%exp2 = call fp128 @llvm.experimental.constrained.exp2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -459,9 +379,8 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
;
; X86-LABEL: floor:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -472,18 +391,11 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll floorl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%floor = call fp128 @llvm.experimental.constrained.floor.f128(fp128 %x, metadata !"fpexcept.strict") #0
@@ -500,9 +412,8 @@ define fp128 @log(fp128 %x) nounwind strictfp {
;
; X86-LABEL: log:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -513,18 +424,11 @@ define fp128 @log(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll logl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%log = call fp128 @llvm.experimental.constrained.log.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -541,9 +445,8 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
;
; X86-LABEL: log10:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -554,18 +457,11 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll log10l
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%log10 = call fp128 @llvm.experimental.constrained.log10.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -582,9 +478,8 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
;
; X86-LABEL: log2:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -595,18 +490,11 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll log2l
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%log2 = call fp128 @llvm.experimental.constrained.log2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -623,9 +511,8 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: maxnum:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -640,18 +527,11 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll fmaxl
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%maxnum = call fp128 @llvm.experimental.constrained.maxnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
@@ -668,9 +548,8 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: minnum:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -685,18 +564,11 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll fminl
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%minnum = call fp128 @llvm.experimental.constrained.minnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
@@ -713,9 +585,8 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
;
; X86-LABEL: nearbyint:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -726,18 +597,11 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll nearbyintl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%nearbyint = call fp128 @llvm.experimental.constrained.nearbyint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -754,9 +618,8 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
;
; X86-LABEL: pow:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -771,18 +634,11 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll powl
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%pow = call fp128 @llvm.experimental.constrained.pow.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -799,9 +655,8 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
;
; X86-LABEL: powi:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $8, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -813,18 +668,11 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __powitf2
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%powi = call fp128 @llvm.experimental.constrained.powi.f128(fp128 %x, i32 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -841,9 +689,8 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
;
; X86-LABEL: rint:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -854,18 +701,11 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll rintl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%rint = call fp128 @llvm.experimental.constrained.rint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -882,9 +722,8 @@ define fp128 @round(fp128 %x) nounwind strictfp {
;
; X86-LABEL: round:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -895,18 +734,11 @@ define fp128 @round(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll roundl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%round = call fp128 @llvm.experimental.constrained.round.f128(fp128 %x, metadata !"fpexcept.strict") #0
@@ -923,9 +755,8 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
;
; X86-LABEL: roundeven:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -936,18 +767,11 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll roundevenl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%roundeven = call fp128 @llvm.experimental.constrained.roundeven.f128(fp128 %x, metadata !"fpexcept.strict") #0
@@ -964,9 +788,8 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
;
; X86-LABEL: sin:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -977,18 +800,11 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll sinl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%sin = call fp128 @llvm.experimental.constrained.sin.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -1005,9 +821,8 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
;
; X86-LABEL: sqrt:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -1018,18 +833,11 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll sqrtl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%sqrt = call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
@@ -1046,9 +854,8 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
;
; X86-LABEL: trunc:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -1059,18 +866,11 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
; X86-NEXT: pushl %eax
; X86-NEXT: calll truncl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%trunc = call fp128 @llvm.experimental.constrained.trunc.f128(fp128 %x, metadata !"fpexcept.strict") #0
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll
index 72cb5472400a264..4e7e6b4d01944f1 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll
@@ -22,8 +22,7 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Add:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -36,16 +35,9 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __addtf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%add = fadd fp128 %d1, %d2
@@ -66,8 +58,7 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Add:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -80,16 +71,9 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __addtf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+8
-; X86-NEXT: movl %edx, vf128+12
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%0 = load fp128, ptr @vf128, align 16
@@ -109,8 +93,7 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Sub:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -123,16 +106,9 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __subtf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sub = fsub fp128 %d1, %d2
@@ -153,8 +129,7 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Sub:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -167,16 +142,9 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __subtf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+8
-; X86-NEXT: movl %edx, vf128+12
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%0 = load fp128, ptr @vf128, align 16
@@ -196,8 +164,7 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Mul:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -210,16 +177,9 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __multf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%mul = fmul fp128 %d1, %d2
@@ -240,8 +200,7 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Mul:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -254,16 +213,9 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __multf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+8
-; X86-NEXT: movl %edx, vf128+12
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%0 = load fp128, ptr @vf128, align 16
@@ -283,8 +235,7 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Div:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -297,16 +248,9 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __divtf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%div = fdiv fp128 %d1, %d2
@@ -327,8 +271,7 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Div:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -341,16 +284,9 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll __divtf3
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+8
-; X86-NEXT: movl %edx, vf128+12
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%0 = load fp128, ptr @vf128, align 16
@@ -370,8 +306,7 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
;
; X86-LABEL: Test128Rem:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -384,16 +319,9 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll fmodl
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%div = frem fp128 %d1, %d2
@@ -414,8 +342,7 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
;
; X86-LABEL: Test128_1Rem:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -428,16 +355,9 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll fmodl
; X86-NEXT: addl $44, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+8
-; X86-NEXT: movl %edx, vf128+12
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%0 = load fp128, ptr @vf128, align 16
@@ -457,8 +377,7 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Sqrt:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -467,16 +386,9 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll sqrtl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sqrt = call fp128 @llvm.sqrt.f128(fp128 %d1)
@@ -496,8 +408,7 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Sin:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -506,16 +417,9 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll sinl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sqrt = call fp128 @llvm.sin.f128(fp128 %d1)
@@ -535,8 +439,7 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Cos:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -545,16 +448,9 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll cosl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sqrt = call fp128 @llvm.cos.f128(fp128 %d1)
@@ -574,8 +470,7 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Ceil:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -584,16 +479,9 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll ceill
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sqrt = call fp128 @llvm.ceil.f128(fp128 %d1)
@@ -613,8 +501,7 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Floor:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -623,16 +510,9 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll floorl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sqrt = call fp128 @llvm.floor.f128(fp128 %d1)
@@ -652,8 +532,7 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Trunc:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -662,16 +541,9 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll truncl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sqrt = call fp128 @llvm.trunc.f128(fp128 %d1)
@@ -691,8 +563,7 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Nearbyint:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -701,16 +572,9 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll nearbyintl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sqrt = call fp128 @llvm.nearbyint.f128(fp128 %d1)
@@ -730,8 +594,7 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Rint:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -740,16 +603,9 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll rintl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sqrt = call fp128 @llvm.rint.f128(fp128 %d1)
@@ -769,8 +625,7 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
;
; X86-LABEL: Test128Round:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $36, %esp
+; X86-NEXT: subl $40, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
@@ -779,16 +634,9 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll roundl
; X86-NEXT: addl $28, %esp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, vf128+12
-; X86-NEXT: movl %edx, vf128+8
-; X86-NEXT: movl %ecx, vf128+4
-; X86-NEXT: movl %eax, vf128
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, vf128
+; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%sqrt = call fp128 @llvm.round.f128(fp128 %d1)
@@ -804,9 +652,8 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
;
; X86-LABEL: Test128FMA:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
+; X86-NEXT: subl $24, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -825,18 +672,11 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
; X86-NEXT: pushl %eax
; X86-NEXT: calll fmal
; X86-NEXT: addl $60, %esp
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, 12(%esi)
-; X86-NEXT: movl %edx, 8(%esi)
-; X86-NEXT: movl %ecx, 4(%esi)
-; X86-NEXT: movl %eax, (%esi)
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $20, %esp
+; X86-NEXT: addl $24, %esp
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%call = call fp128 @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
diff --git a/llvm/test/CodeGen/X86/fpenv-combine.ll b/llvm/test/CodeGen/X86/fpenv-combine.ll
index 568369a061cebb8..d1be4a5f44cf26f 100644
--- a/llvm/test/CodeGen/X86/fpenv-combine.ll
+++ b/llvm/test/CodeGen/X86/fpenv-combine.ll
@@ -15,19 +15,19 @@ define void @get_fpenv_02(ptr %ptr1, ptr %ptr2) #0 {
; X64-NEXT: subq $40, %rsp
; X64-NEXT: movq %rsi, %rbx
; X64-NEXT: movq %rdi, %r14
-; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: callq fegetenv at PLT
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT: movq (%rsp), %rcx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
-; X64-NEXT: movq %rsi, 24(%r14)
+; X64-NEXT: movq %rsi, 16(%r14)
; X64-NEXT: movq %rcx, (%r14)
+; X64-NEXT: movq %rax, 24(%r14)
; X64-NEXT: movq %rdx, 8(%r14)
-; X64-NEXT: movq %rax, 16(%r14)
-; X64-NEXT: movq %rax, 16(%rbx)
-; X64-NEXT: movq %rsi, 24(%rbx)
+; X64-NEXT: movq %rsi, 16(%rbx)
; X64-NEXT: movq %rcx, (%rbx)
+; X64-NEXT: movq %rax, 24(%rbx)
; X64-NEXT: movq %rdx, 8(%rbx)
; X64-NEXT: addq $40, %rsp
; X64-NEXT: popq %rbx
@@ -72,9 +72,9 @@ define void @get_fpenv_04(ptr %ptr) #0 {
; X64-NEXT: movq (%rsp), %rax
; X64-NEXT: andl $1, %eax
; X64-NEXT: movq %rax, (%rbx)
-; X64-NEXT: movq $0, 16(%rbx)
; X64-NEXT: movq $0, 24(%rbx)
; X64-NEXT: movq $0, 8(%rbx)
+; X64-NEXT: movq $0, 16(%rbx)
; X64-NEXT: addq $32, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: retq
@@ -94,9 +94,9 @@ define void @get_fpenv_05(ptr %ptr1, ptr %ptr2) #0 {
; X64-NEXT: subq $40, %rsp
; X64-NEXT: movq %rsi, %rbx
; X64-NEXT: movq %rdi, %r14
-; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: callq fegetenv at PLT
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: movq (%rsp), %rax
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
@@ -129,8 +129,8 @@ define void @set_fpenv_02(ptr %ptr1, ptr %ptr2) #0 {
; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: movq %rax, (%rsp)
+; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: callq fesetenv at PLT
; X64-NEXT: addq $40, %rsp
; X64-NEXT: retq
@@ -182,11 +182,11 @@ define void @set_fpenv_04(ptr %ptr) #0 {
; X64-NEXT: subq $40, %rsp
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: andl $1, %eax
-; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT: movq %rax, (%rsp)
; X64-NEXT: movq $0, {{[0-9]+}}(%rsp)
; X64-NEXT: movq $0, {{[0-9]+}}(%rsp)
; X64-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: callq fesetenv at PLT
; X64-NEXT: addq $40, %rsp
; X64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll
index 1f01af92ca9261f..c79e19f07cda547 100644
--- a/llvm/test/CodeGen/X86/fpenv.ll
+++ b/llvm/test/CodeGen/X86/fpenv.ll
@@ -252,20 +252,20 @@ define void @func_05(i32 %x) nounwind {
define void @get_fpenv_01(ptr %ptr) #0 {
; X86-NOSSE-LABEL: get_fpenv_01:
; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: subl $44, %esp
+; X86-NOSSE-NEXT: subl $60, %esp
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: calll fegetenv
-; X86-NOSSE-NEXT: addl $44, %esp
+; X86-NOSSE-NEXT: addl $60, %esp
; X86-NOSSE-NEXT: retl
;
; X86-SSE-LABEL: get_fpenv_01:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: subl $44, %esp
+; X86-SSE-NEXT: subl $60, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll fegetenv
-; X86-SSE-NEXT: addl $44, %esp
+; X86-SSE-NEXT: addl $60, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: get_fpenv_01:
@@ -283,21 +283,21 @@ entry:
define void @get_fpenv_01_native(ptr %ptr) nounwind {
; X86-NOSSE-LABEL: get_fpenv_01_native:
; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: subl $36, %esp
+; X86-NOSSE-NEXT: subl $44, %esp
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: fnstenv (%eax)
; X86-NOSSE-NEXT: fldenv (%eax)
-; X86-NOSSE-NEXT: addl $36, %esp
+; X86-NOSSE-NEXT: addl $44, %esp
; X86-NOSSE-NEXT: retl
;
; X86-SSE-LABEL: get_fpenv_01_native:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: subl $36, %esp
+; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: fnstenv (%eax)
; X86-SSE-NEXT: fldenv (%eax)
; X86-SSE-NEXT: stmxcsr 28(%eax)
-; X86-SSE-NEXT: addl $36, %esp
+; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: get_fpenv_01_native:
@@ -315,20 +315,20 @@ entry:
define void @set_fpenv_01(ptr %ptr) #0 {
; X86-NOSSE-LABEL: set_fpenv_01:
; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: subl $44, %esp
+; X86-NOSSE-NEXT: subl $60, %esp
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: calll fesetenv
-; X86-NOSSE-NEXT: addl $44, %esp
+; X86-NOSSE-NEXT: addl $60, %esp
; X86-NOSSE-NEXT: retl
;
; X86-SSE-LABEL: set_fpenv_01:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: subl $44, %esp
+; X86-SSE-NEXT: subl $60, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll fesetenv
-; X86-SSE-NEXT: addl $44, %esp
+; X86-SSE-NEXT: addl $60, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: set_fpenv_01:
@@ -346,19 +346,19 @@ entry:
define void @set_fpenv_01_native(ptr %ptr) nounwind {
; X86-NOSSE-LABEL: set_fpenv_01_native:
; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: subl $36, %esp
+; X86-NOSSE-NEXT: subl $44, %esp
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: fldenv (%eax)
-; X86-NOSSE-NEXT: addl $36, %esp
+; X86-NOSSE-NEXT: addl $44, %esp
; X86-NOSSE-NEXT: retl
;
; X86-SSE-LABEL: set_fpenv_01_native:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: subl $36, %esp
+; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: fldenv (%eax)
; X86-SSE-NEXT: ldmxcsr 28(%eax)
-; X86-SSE-NEXT: addl $36, %esp
+; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: set_fpenv_01_native:
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
index 76e15ca0bf919b7..2eb351c8fac9e46 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
@@ -690,7 +690,7 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
-; X86-X87-NEXT: subl $44, %esp
+; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fsts {{[0-9]+}}(%esp)
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -766,7 +766,7 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; X86-X87-NEXT: andl $15, %edx
; X86-X87-NEXT: movb %dl, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
-; X86-X87-NEXT: addl $44, %esp
+; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
@@ -779,7 +779,7 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: subl $28, %esp
+; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
@@ -819,7 +819,7 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; X86-SSE-NEXT: andl $15, %eax
; X86-SSE-NEXT: movb %al, 12(%esi)
; X86-SSE-NEXT: movl %esi, %eax
-; X86-SSE-NEXT: addl $28, %esp
+; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
@@ -859,7 +859,7 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
-; X86-X87-NEXT: subl $44, %esp
+; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fsts {{[0-9]+}}(%esp)
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -931,7 +931,7 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
-; X86-X87-NEXT: addl $44, %esp
+; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
@@ -954,7 +954,7 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: subl $28, %esp
+; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
@@ -990,7 +990,7 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-SSE-NEXT: movl %edx, 4(%esi)
; X86-SSE-NEXT: movl %eax, (%esi)
; X86-SSE-NEXT: movl %esi, %eax
-; X86-SSE-NEXT: addl $28, %esp
+; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
@@ -2882,7 +2882,7 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
-; X86-X87-NEXT: subl $44, %esp
+; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
@@ -2960,7 +2960,7 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; X86-X87-NEXT: andl $15, %edx
; X86-X87-NEXT: movb %dl, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
-; X86-X87-NEXT: addl $44, %esp
+; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
@@ -3061,7 +3061,7 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
-; X86-X87-NEXT: subl $44, %esp
+; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
@@ -3135,7 +3135,7 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
-; X86-X87-NEXT: addl $44, %esp
+; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
index 01426b1ac91c240..e334af71397ff4f 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
@@ -807,7 +807,7 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
-; X86-X87-NEXT: subl $44, %esp
+; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fsts {{[0-9]+}}(%esp)
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -864,7 +864,7 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
-; X86-X87-NEXT: addl $44, %esp
+; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
@@ -2818,7 +2818,7 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
-; X86-X87-NEXT: subl $44, %esp
+; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
@@ -2877,7 +2877,7 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
-; X86-X87-NEXT: addl $44, %esp
+; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/implicit-null-check.ll b/llvm/test/CodeGen/X86/implicit-null-check.ll
index b02f66fe8cfb1ca..fc81f703f5d4076 100644
--- a/llvm/test/CodeGen/X86/implicit-null-check.ll
+++ b/llvm/test/CodeGen/X86/implicit-null-check.ll
@@ -128,19 +128,15 @@ define i256 @imp_null_check_load_i256(ptr %x) {
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: Ltmp3:
-; CHECK-NEXT: movq (%rsi), %rcx ## on-fault: LBB5_1
+; CHECK-NEXT: movaps (%rsi), %xmm0 ## on-fault: LBB5_1
; CHECK-NEXT: ## %bb.2: ## %not_null
-; CHECK-NEXT: movq 8(%rsi), %rdx
-; CHECK-NEXT: movq 16(%rsi), %rdi
-; CHECK-NEXT: movq 24(%rsi), %rsi
-; CHECK-NEXT: movq %rsi, 24(%rax)
-; CHECK-NEXT: movq %rdi, 16(%rax)
-; CHECK-NEXT: movq %rdx, 8(%rax)
-; CHECK-NEXT: movq %rcx, (%rax)
+; CHECK-NEXT: movaps 16(%rsi), %xmm1
+; CHECK-NEXT: movaps %xmm1, 16(%rax)
+; CHECK-NEXT: movaps %xmm0, (%rax)
; CHECK-NEXT: retq
; CHECK-NEXT: LBB5_1: ## %is_null
-; CHECK-NEXT: movq $0, 24(%rax)
-; CHECK-NEXT: movq $0, 16(%rax)
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: movaps %xmm0, 16(%rax)
; CHECK-NEXT: movq $0, 8(%rax)
; CHECK-NEXT: movq $42, (%rax)
; CHECK-NEXT: retq
@@ -622,7 +618,8 @@ define i64 @imp_null_check_load_shift_by_3_addr(ptr %x) {
define i64 @imp_null_check_load_shift_add_addr(ptr %x) {
; CHECK-LABEL: imp_null_check_load_shift_add_addr:
; CHECK: ## %bb.0: ## %entry
-; CHECK: movq 3526(,%rdi,8), %rax ## on-fault: LBB23_1
+; CHECK-NEXT: Ltmp19:
+; CHECK-NEXT: movq 3526(,%rdi,8), %rax ## on-fault: LBB23_1
; CHECK-NEXT: ## %bb.2: ## %not_null
; CHECK-NEXT: retq
; CHECK-NEXT: LBB23_1: ## %is_null
diff --git a/llvm/test/CodeGen/X86/osx-private-labels.ll b/llvm/test/CodeGen/X86/osx-private-labels.ll
index 7fed2ad23eb9445..f3343ccb98f53c6 100644
--- a/llvm/test/CodeGen/X86/osx-private-labels.ll
+++ b/llvm/test/CodeGen/X86/osx-private-labels.ll
@@ -36,7 +36,7 @@
@private6 = private unnamed_addr constant i128 42
; CHECK: .section __TEXT,__literal16,16byte_literals
-; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: .p2align 4
; CHECK-NEXT: L_private6:
%struct._objc_class = type { ptr }
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index 0b3ef70d2beefd2..a9f3e8b22fb69ed 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -218,6 +218,8 @@ define i256 @test2(i256 %a) nounwind {
; ILP-LABEL: test2:
; ILP: # %bb.0:
; ILP-NEXT: movq %rdi, %rax
+; ILP-NEXT: xorps %xmm0, %xmm0
+; ILP-NEXT: movaps %xmm0, 16(%rdi)
; ILP-NEXT: xorl %edi, %edi
; ILP-NEXT: movq %rsi, %r11
; ILP-NEXT: negq %r11
@@ -250,14 +252,14 @@ define i256 @test2(i256 %a) nounwind {
; ILP-NEXT: orq %rdi, %r9
; ILP-NEXT: cmovneq %rcx, %r8
; ILP-NEXT: movq %r8, (%rax)
-; ILP-NEXT: movq $0, 24(%rax)
-; ILP-NEXT: movq $0, 16(%rax)
; ILP-NEXT: movq $0, 8(%rax)
; ILP-NEXT: retq
;
; HYBRID-LABEL: test2:
; HYBRID: # %bb.0:
; HYBRID-NEXT: movq %rdi, %rax
+; HYBRID-NEXT: xorps %xmm0, %xmm0
+; HYBRID-NEXT: movaps %xmm0, 16(%rdi)
; HYBRID-NEXT: xorl %edi, %edi
; HYBRID-NEXT: movq %rsi, %r11
; HYBRID-NEXT: negq %r11
@@ -290,14 +292,14 @@ define i256 @test2(i256 %a) nounwind {
; HYBRID-NEXT: orq %rdi, %r9
; HYBRID-NEXT: cmovneq %rcx, %r8
; HYBRID-NEXT: movq %r8, (%rax)
-; HYBRID-NEXT: movq $0, 24(%rax)
-; HYBRID-NEXT: movq $0, 16(%rax)
; HYBRID-NEXT: movq $0, 8(%rax)
; HYBRID-NEXT: retq
;
; BURR-LABEL: test2:
; BURR: # %bb.0:
; BURR-NEXT: movq %rdi, %rax
+; BURR-NEXT: xorps %xmm0, %xmm0
+; BURR-NEXT: movaps %xmm0, 16(%rdi)
; BURR-NEXT: xorl %edi, %edi
; BURR-NEXT: movq %rsi, %r11
; BURR-NEXT: negq %r11
@@ -330,8 +332,6 @@ define i256 @test2(i256 %a) nounwind {
; BURR-NEXT: orq %rdi, %r9
; BURR-NEXT: cmovneq %rcx, %r8
; BURR-NEXT: movq %r8, (%rax)
-; BURR-NEXT: movq $0, 24(%rax)
-; BURR-NEXT: movq $0, 16(%rax)
; BURR-NEXT: movq $0, 8(%rax)
; BURR-NEXT: retq
;
@@ -369,15 +369,17 @@ define i256 @test2(i256 %a) nounwind {
; SRC-NEXT: subq $-128, %r8
; SRC-NEXT: orq %r9, %rdi
; SRC-NEXT: cmovneq %rdx, %r8
+; SRC-NEXT: xorps %xmm0, %xmm0
+; SRC-NEXT: movaps %xmm0, 16(%rax)
; SRC-NEXT: movq %r8, (%rax)
-; SRC-NEXT: movq $0, 24(%rax)
-; SRC-NEXT: movq $0, 16(%rax)
; SRC-NEXT: movq $0, 8(%rax)
; SRC-NEXT: retq
;
; LIN-LABEL: test2:
; LIN: # %bb.0:
; LIN-NEXT: movq %rdi, %rax
+; LIN-NEXT: xorps %xmm0, %xmm0
+; LIN-NEXT: movaps %xmm0, 16(%rdi)
; LIN-NEXT: movq %rsi, %rdi
; LIN-NEXT: negq %rdi
; LIN-NEXT: andq %rsi, %rdi
@@ -411,8 +413,6 @@ define i256 @test2(i256 %a) nounwind {
; LIN-NEXT: cmoveq %rdx, %r8
; LIN-NEXT: movq %r8, (%rax)
; LIN-NEXT: movq $0, 8(%rax)
-; LIN-NEXT: movq $0, 16(%rax)
-; LIN-NEXT: movq $0, 24(%rax)
; LIN-NEXT: retq
%b = sub i256 0, %a
%c = and i256 %b, %a
@@ -425,6 +425,8 @@ define i256 @test3(i256 %n) nounwind {
; ILP: # %bb.0:
; ILP-NEXT: pushq %rbx
; ILP-NEXT: movq %rdi, %rax
+; ILP-NEXT: xorps %xmm0, %xmm0
+; ILP-NEXT: movaps %xmm0, 16(%rdi)
; ILP-NEXT: xorl %r9d, %r9d
; ILP-NEXT: movq %rsi, %rdi
; ILP-NEXT: negq %rdi
@@ -439,13 +441,13 @@ define i256 @test3(i256 %n) nounwind {
; ILP-NEXT: notq %rdx
; ILP-NEXT: andq %r10, %rdx
; ILP-NEXT: bsrq %rdx, %r9
-; ILP-NEXT: notq %rsi
; ILP-NEXT: xorq $63, %rbx
; ILP-NEXT: notq %rcx
; ILP-NEXT: andq %r11, %rcx
; ILP-NEXT: bsrq %rcx, %r10
; ILP-NEXT: xorq $63, %r10
; ILP-NEXT: addq $64, %r10
+; ILP-NEXT: notq %rsi
; ILP-NEXT: testq %r8, %r8
; ILP-NEXT: cmovneq %rbx, %r10
; ILP-NEXT: xorq $63, %r9
@@ -461,8 +463,6 @@ define i256 @test3(i256 %n) nounwind {
; ILP-NEXT: orq %r8, %rcx
; ILP-NEXT: cmovneq %r10, %rsi
; ILP-NEXT: movq %rsi, (%rax)
-; ILP-NEXT: movq $0, 24(%rax)
-; ILP-NEXT: movq $0, 16(%rax)
; ILP-NEXT: movq $0, 8(%rax)
; ILP-NEXT: popq %rbx
; ILP-NEXT: retq
@@ -471,6 +471,8 @@ define i256 @test3(i256 %n) nounwind {
; HYBRID: # %bb.0:
; HYBRID-NEXT: pushq %rbx
; HYBRID-NEXT: movq %rdi, %rax
+; HYBRID-NEXT: xorps %xmm0, %xmm0
+; HYBRID-NEXT: movaps %xmm0, 16(%rdi)
; HYBRID-NEXT: xorl %r9d, %r9d
; HYBRID-NEXT: movq %rsi, %rdi
; HYBRID-NEXT: negq %rdi
@@ -507,8 +509,6 @@ define i256 @test3(i256 %n) nounwind {
; HYBRID-NEXT: orq %r8, %rcx
; HYBRID-NEXT: cmovneq %r9, %rsi
; HYBRID-NEXT: movq %rsi, (%rax)
-; HYBRID-NEXT: movq $0, 24(%rax)
-; HYBRID-NEXT: movq $0, 16(%rax)
; HYBRID-NEXT: movq $0, 8(%rax)
; HYBRID-NEXT: popq %rbx
; HYBRID-NEXT: retq
@@ -517,6 +517,8 @@ define i256 @test3(i256 %n) nounwind {
; BURR: # %bb.0:
; BURR-NEXT: pushq %rbx
; BURR-NEXT: movq %rdi, %rax
+; BURR-NEXT: xorps %xmm0, %xmm0
+; BURR-NEXT: movaps %xmm0, 16(%rdi)
; BURR-NEXT: xorl %r9d, %r9d
; BURR-NEXT: movq %rsi, %rdi
; BURR-NEXT: negq %rdi
@@ -553,8 +555,6 @@ define i256 @test3(i256 %n) nounwind {
; BURR-NEXT: orq %r8, %rcx
; BURR-NEXT: cmovneq %r9, %rsi
; BURR-NEXT: movq %rsi, (%rax)
-; BURR-NEXT: movq $0, 24(%rax)
-; BURR-NEXT: movq $0, 16(%rax)
; BURR-NEXT: movq $0, 8(%rax)
; BURR-NEXT: popq %rbx
; BURR-NEXT: retq
@@ -597,15 +597,17 @@ define i256 @test3(i256 %n) nounwind {
; SRC-NEXT: subq $-128, %r10
; SRC-NEXT: orq %rcx, %r8
; SRC-NEXT: cmovneq %r9, %r10
+; SRC-NEXT: xorps %xmm0, %xmm0
+; SRC-NEXT: movaps %xmm0, 16(%rax)
; SRC-NEXT: movq %r10, (%rax)
-; SRC-NEXT: movq $0, 24(%rax)
-; SRC-NEXT: movq $0, 16(%rax)
; SRC-NEXT: movq $0, 8(%rax)
; SRC-NEXT: retq
;
; LIN-LABEL: test3:
; LIN: # %bb.0:
; LIN-NEXT: movq %rdi, %rax
+; LIN-NEXT: xorps %xmm0, %xmm0
+; LIN-NEXT: movaps %xmm0, 16(%rdi)
; LIN-NEXT: movq %rsi, %rdi
; LIN-NEXT: negq %rdi
; LIN-NEXT: notq %rsi
@@ -643,8 +645,6 @@ define i256 @test3(i256 %n) nounwind {
; LIN-NEXT: cmoveq %rsi, %rdi
; LIN-NEXT: movq %rdi, (%rax)
; LIN-NEXT: movq $0, 8(%rax)
-; LIN-NEXT: movq $0, 16(%rax)
-; LIN-NEXT: movq $0, 24(%rax)
; LIN-NEXT: retq
%m = sub i256 -1, %n
%x = sub i256 0, %n
diff --git a/llvm/test/CodeGen/X86/sdiv_fix.ll b/llvm/test/CodeGen/X86/sdiv_fix.ll
index 336aa216d19b114..ce0b212aa4c26cd 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix.ll
@@ -306,8 +306,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $72, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $80, %esp
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 20(%ebp), %edx
diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
index 371484e01556c82..31297a06f809935 100644
--- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -369,8 +369,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $88, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $96, %esp
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 20(%ebp), %esi
@@ -805,8 +805,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $192, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $208, %esp
; X86-NEXT: movl 36(%ebp), %esi
; X86-NEXT: movl 16(%ebp), %ebx
; X86-NEXT: movl 32(%ebp), %eax
diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll
index 61254d5e5c2f48a..5aa266db6553d3f 100644
--- a/llvm/test/CodeGen/X86/setcc-wide-types.ll
+++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll
@@ -774,13 +774,11 @@ define i1 @ne_v4i256(<4 x i256> %a0) {
define i32 @ne_i128_pair(ptr %a, ptr %b) {
; SSE2-LABEL: ne_i128_pair:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqu (%rdi), %xmm0
-; SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; SSE2-NEXT: movdqu (%rsi), %xmm2
-; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: movdqa 16(%rdi), %xmm1
+; SSE2-NEXT: pcmpeqb 16(%rsi), %xmm1
+; SSE2-NEXT: pcmpeqb (%rsi), %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %ecx
; SSE2-NEXT: xorl %eax, %eax
; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
@@ -789,13 +787,11 @@ define i32 @ne_i128_pair(ptr %a, ptr %b) {
;
; SSE41-LABEL: ne_i128_pair:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqu (%rdi), %xmm0
-; SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; SSE41-NEXT: movdqu (%rsi), %xmm2
-; SSE41-NEXT: pxor %xmm0, %xmm2
-; SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: por %xmm2, %xmm0
+; SSE41-NEXT: movdqa (%rdi), %xmm0
+; SSE41-NEXT: movdqa 16(%rdi), %xmm1
+; SSE41-NEXT: pxor 16(%rsi), %xmm1
+; SSE41-NEXT: pxor (%rsi), %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: xorl %eax, %eax
; SSE41-NEXT: ptest %xmm0, %xmm0
; SSE41-NEXT: setne %al
@@ -803,8 +799,8 @@ define i32 @ne_i128_pair(ptr %a, ptr %b) {
;
; AVXANY-LABEL: ne_i128_pair:
; AVXANY: # %bb.0:
-; AVXANY-NEXT: vmovdqu (%rdi), %xmm0
-; AVXANY-NEXT: vmovdqu 16(%rdi), %xmm1
+; AVXANY-NEXT: vmovdqa (%rdi), %xmm0
+; AVXANY-NEXT: vmovdqa 16(%rdi), %xmm1
; AVXANY-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
; AVXANY-NEXT: vpxor (%rsi), %xmm0, %xmm0
; AVXANY-NEXT: vpor %xmm1, %xmm0, %xmm0
@@ -832,13 +828,11 @@ define i32 @ne_i128_pair(ptr %a, ptr %b) {
define i32 @eq_i128_pair(ptr %a, ptr %b) {
; SSE2-LABEL: eq_i128_pair:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqu (%rdi), %xmm0
-; SSE2-NEXT: movdqu 16(%rdi), %xmm1
-; SSE2-NEXT: movdqu (%rsi), %xmm2
-; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: movdqu 16(%rsi), %xmm0
-; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: movdqa 16(%rdi), %xmm1
+; SSE2-NEXT: pcmpeqb 16(%rsi), %xmm1
+; SSE2-NEXT: pcmpeqb (%rsi), %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %ecx
; SSE2-NEXT: xorl %eax, %eax
; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
@@ -847,13 +841,11 @@ define i32 @eq_i128_pair(ptr %a, ptr %b) {
;
; SSE41-LABEL: eq_i128_pair:
; SSE41: # %bb.0:
-; SSE41-NEXT: movdqu (%rdi), %xmm0
-; SSE41-NEXT: movdqu 16(%rdi), %xmm1
-; SSE41-NEXT: movdqu (%rsi), %xmm2
-; SSE41-NEXT: pxor %xmm0, %xmm2
-; SSE41-NEXT: movdqu 16(%rsi), %xmm0
-; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: por %xmm2, %xmm0
+; SSE41-NEXT: movdqa (%rdi), %xmm0
+; SSE41-NEXT: movdqa 16(%rdi), %xmm1
+; SSE41-NEXT: pxor 16(%rsi), %xmm1
+; SSE41-NEXT: pxor (%rsi), %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: xorl %eax, %eax
; SSE41-NEXT: ptest %xmm0, %xmm0
; SSE41-NEXT: sete %al
@@ -861,8 +853,8 @@ define i32 @eq_i128_pair(ptr %a, ptr %b) {
;
; AVXANY-LABEL: eq_i128_pair:
; AVXANY: # %bb.0:
-; AVXANY-NEXT: vmovdqu (%rdi), %xmm0
-; AVXANY-NEXT: vmovdqu 16(%rdi), %xmm1
+; AVXANY-NEXT: vmovdqa (%rdi), %xmm0
+; AVXANY-NEXT: vmovdqa 16(%rdi), %xmm1
; AVXANY-NEXT: vpxor 16(%rsi), %xmm1, %xmm1
; AVXANY-NEXT: vpxor (%rsi), %xmm0, %xmm0
; AVXANY-NEXT: vpor %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll
index 0e17af441d649b8..da0e3fdc1a5272a 100644
--- a/llvm/test/CodeGen/X86/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll
@@ -804,7 +804,7 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movb %cl, 16(%eax)
-; X86-NEXT: setne 20(%eax)
+; X86-NEXT: setne 32(%eax)
; X86-NEXT: addl $188, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -990,7 +990,7 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; X64-NEXT: movq %rcx, (%rax)
; X64-NEXT: movb %sil, 16(%rax)
-; X64-NEXT: setne 24(%rax)
+; X64-NEXT: setne 32(%rax)
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
diff --git a/llvm/test/CodeGen/X86/sret-implicit.ll b/llvm/test/CodeGen/X86/sret-implicit.ll
index 8cd2e63263dd2fa..6cd64812f24a662 100644
--- a/llvm/test/CodeGen/X86/sret-implicit.ll
+++ b/llvm/test/CodeGen/X86/sret-implicit.ll
@@ -25,7 +25,8 @@ define i256 @sret_demoted() {
; X64-LABEL: sret_demoted
; X64-DAG: movq %rdi, %rax
-; X64-DAG: movq $0, (%rdi)
+; X64-DAG: xorps %xmm0, %xmm0
+; X64-DAG: movaps %xmm0, (%rdi)
; X64: retq
; X86-LABEL: sret_demoted
diff --git a/llvm/test/CodeGen/X86/statepoint-deopt-lowering.ll b/llvm/test/CodeGen/X86/statepoint-deopt-lowering.ll
index cd882450c1af34a..72f4fa37dc5e5bf 100644
--- a/llvm/test/CodeGen/X86/statepoint-deopt-lowering.ll
+++ b/llvm/test/CodeGen/X86/statepoint-deopt-lowering.ll
@@ -43,35 +43,25 @@ define void @test_illegal_constants() gc "statepoint-example" {
; CHECK: # %bb.0:
; CHECK-NEXT: subq $248, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 256
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $144, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $144, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $144, (%rsp)
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $144, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $144, {{[0-9]+}}(%rsp)
; CHECK-NEXT: callq foo at PLT
; CHECK-NEXT: .Ltmp2:
@@ -89,60 +79,36 @@ define void @test_illegal_values(i128 %v1, i256 %v2, i512 %v3, i1024 %v4) gc "st
; CHECK: # %bb.0:
; CHECK-NEXT: subq $248, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 256
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm4
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm5
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm6
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm7
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
; CHECK-NEXT: movq %r9, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %r8, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rdi, (%rsp)
+; CHECK-NEXT: movaps %xmm11, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm10, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm9, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm8, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm5, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm4, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm3, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: callq foo at PLT
; CHECK-NEXT: .Ltmp3:
; CHECK-NEXT: addq $248, %rsp
diff --git a/llvm/test/CodeGen/X86/statepoint-vector.ll b/llvm/test/CodeGen/X86/statepoint-vector.ll
index c8ddb89ee569876..f53ac2f8f6279eb 100644
--- a/llvm/test/CodeGen/X86/statepoint-vector.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vector.ll
@@ -122,9 +122,9 @@ define void @test5() gc "statepoint-example" {
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq $-1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq $-1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $-1, (%rsp)
; CHECK-NEXT: callq do_safepoint at PLT
; CHECK-NEXT: .Ltmp4:
; CHECK-NEXT: addq $40, %rsp
diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll
index 8d3319eb5958830..5b1e0545502b817 100644
--- a/llvm/test/CodeGen/X86/udiv_fix.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix.ll
@@ -152,8 +152,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl %ecx, %edx
diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index 67d3e5b16e08b57..30a7f80b2315d52 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -193,8 +193,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $32, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl %ecx, %edx
diff --git a/llvm/test/tools/llvm-lto2/X86/pipeline.ll b/llvm/test/tools/llvm-lto2/X86/pipeline.ll
index 37568dcd0b61890..9896cf4da2a4040 100644
--- a/llvm/test/tools/llvm-lto2/X86/pipeline.ll
+++ b/llvm/test/tools/llvm-lto2/X86/pipeline.ll
@@ -15,7 +15,7 @@
; is accepted).
; RUN: llvm-lto2 run %t1.bc -o %t.o -r %t1.bc,patatino,px
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @patatino() {
diff --git a/llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll b/llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll
index f5cadc127ef844a..6b54981c80e4335 100644
--- a/llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll
+++ b/llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll
@@ -26,7 +26,7 @@
; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1}
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target datalayout = "e-m:e-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo(ptr %a) {
diff --git a/llvm/test/tools/llvm-lto2/X86/stats-file-option.ll b/llvm/test/tools/llvm-lto2/X86/stats-file-option.ll
index 7daedd701781770..99c248682735e28 100644
--- a/llvm/test/tools/llvm-lto2/X86/stats-file-option.ll
+++ b/llvm/test/tools/llvm-lto2/X86/stats-file-option.ll
@@ -6,7 +6,7 @@
; RUN: llvm-lto2 run %t1.bc -o %t.o -r %t1.bc,patatino,px -stats-file=%t2.stats
; RUN: FileCheck --input-file=%t2.stats %s
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @patatino() {
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index 63878995ef8115e..5d798daf6627177 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -19,14 +19,16 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
"x86_64-unknown-linux-gnu");
std::string DL2 = UpgradeDataLayoutString(
"e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32", "i686-pc-windows-msvc");
- std::string DL3 = UpgradeDataLayoutString("e-m:o-i64:64-i128:128-n32:64-S128",
- "x86_64-apple-macosx");
- EXPECT_EQ(DL1, "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64"
- "-f80:128-n8:16:32:64-S128");
- EXPECT_EQ(DL2, "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64"
- "-f80:128-n8:16:32-S32");
- EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128"
- "-n32:64-S128");
+ std::string DL3 = UpgradeDataLayoutString(
+ "e-m:o-i64:64-f80:128-n8:16:32:64-S128", "x86_64-apple-macosx");
+ EXPECT_EQ(DL1,
+ "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128"
+ "-f80:128-n8:16:32:64-S128");
+ EXPECT_EQ(DL2,
+ "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128"
+ "-f80:128-n8:16:32-S32");
+ EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:"
+ "128-n8:16:32:64-S128");
// Check that AMDGPU targets add -G1 if it's not present.
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32", "r600"), "e-p:32:32-G1");
@@ -58,21 +60,22 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) {
std::string DL1 = UpgradeDataLayoutString(
- "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+ "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-"
+ "f32:32:32"
"-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
"-n8:16:32:64-S128",
"x86_64-unknown-linux-gnu");
- std::string DL2 = UpgradeDataLayoutString("e-p:32:32", "i686-apple-darwin9");
- std::string DL3 = UpgradeDataLayoutString("e-m:e-i64:64-n32:64",
+ std::string DL2 = UpgradeDataLayoutString("e-m:e-i64:64-n32:64",
"powerpc64le-unknown-linux-gnu");
- std::string DL4 =
+ std::string DL3 =
UpgradeDataLayoutString("e-m:o-i64:64-i128:128-n32:64-S128", "aarch64--");
- EXPECT_EQ(DL1, "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
- "-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64"
- "-f80:128:128-n8:16:32:64-S128");
- EXPECT_EQ(DL2, "e-p:32:32");
- EXPECT_EQ(DL3, "e-m:e-i64:64-n32:64");
- EXPECT_EQ(DL4, "e-m:o-i64:64-i128:128-n32:64-S128");
+ EXPECT_EQ(
+ DL1,
+ "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128"
+ "-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64"
+ "-f80:128:128-n8:16:32:64-S128");
+ EXPECT_EQ(DL2, "e-m:e-i64:64-n32:64");
+ EXPECT_EQ(DL3, "e-m:o-i64:64-i128:128-n32:64-S128");
// Check that AMDGPU targets don't add -G1 if there is already a -G flag.
EXPECT_EQ(UpgradeDataLayoutString("e-p:32:32-G2", "r600"), "e-p:32:32-G2");
diff --git a/llvm/unittests/CodeGen/InstrRefLDVTest.cpp b/llvm/unittests/CodeGen/InstrRefLDVTest.cpp
index c0272c337dfef6d..5a050bc520362c2 100644
--- a/llvm/unittests/CodeGen/InstrRefLDVTest.cpp
+++ b/llvm/unittests/CodeGen/InstrRefLDVTest.cpp
@@ -70,8 +70,8 @@ class InstrRefLDVTest : public testing::Test {
void SetUp() {
// Boilerplate that creates a MachineFunction and associated blocks.
- Mod->setDataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-"
- "n8:16:32:64-S128");
+ Mod->setDataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-"
+ "f80:128-n8:16:32:64-S128");
Triple TargetTriple("x86_64--");
std::string Error;
const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
@@ -475,8 +475,8 @@ body: |
auto MIRParse = createMIRParser(std::move(MemBuf), Ctx);
Mod = MIRParse->parseIRModule();
assert(Mod);
- Mod->setDataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-"
- "n8:16:32:64-S128");
+ Mod->setDataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-"
+ "f80:128-n8:16:32:64-S128");
bool Result = MIRParse->parseMachineFunctions(*Mod, *MMI);
assert(!Result && "Failed to parse unit test machine function?");
More information about the cfe-commits
mailing list