[llvm] [DAGCombiner][X86][WIP] Combine (build_vector (load X))->(vecty (load X)) (PR #88753)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 15 09:08:37 PDT 2024
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/88753
This is an alternative to #88261.
This gets the mmx cases from that patch without affecting RISC-V.
The code here is largely lifted from the (bitcast (load))->(load) code.
>From c1570e959de48e876202d8a6e679abd20a554176 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 15 Apr 2024 09:04:52 -0700
Subject: [PATCH] [DAGCombiner][X86][WIP] Combine (build_vector (load
X))->(vecty (load X))
This is an alternative to #88261.
This gets the mmx cases from that patch without affecting RISC-V.
The code here is largely lifted from the (bitcast (load))->(load) code.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 +
llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll | 15 +-
llvm/test/CodeGen/X86/mmx-intrinsics.ll | 1040 ++++-------------
llvm/test/CodeGen/X86/pr35982.ll | 4 +-
4 files changed, 251 insertions(+), 834 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dbbe8d9193c25c..beab5a052a048b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23501,6 +23501,32 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
}
+ if (N->getNumOperands() == 1) {
+ SDValue N0 = N->getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not remove the cast if the types differ in endian layout.
+ TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
+ TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
+ // If the load is volatile, we only want to change the load type if the
+ // resulting load is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
+ TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+ *LN0->getMemOperand())) {
+ SDValue Load =
+ DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
index 69f733461efc77..3ced6c9483a6c8 100644
--- a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -6,22 +6,11 @@ define void @test(<1 x i64> %c64, <1 x i64> %mask1, ptr %P) {
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: subl $16, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: .cfi_offset %edi, -8
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %eax, (%esp)
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
-; CHECK-NEXT: movq (%esp), %mm0
+; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm1
-; CHECK-NEXT: maskmovq %mm0, %mm1
-; CHECK-NEXT: addl $16, %esp
+; CHECK-NEXT: maskmovq %mm1, %mm0
; CHECK-NEXT: popl %edi
; CHECK-NEXT: retl
entry:
diff --git a/llvm/test/CodeGen/X86/mmx-intrinsics.ll b/llvm/test/CodeGen/X86/mmx-intrinsics.ll
index a43d9400cde6c8..a7917d58cde5d7 100644
--- a/llvm/test/CodeGen/X86/mmx-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/mmx-intrinsics.ll
@@ -12,17 +12,9 @@ define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: phaddw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: phaddw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -57,17 +49,9 @@ define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpgtd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpgtd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -102,17 +86,9 @@ define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpgtw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpgtw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -147,17 +123,9 @@ define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpgtb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpgtb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -192,17 +160,9 @@ define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpeqd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpeqd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -237,17 +197,9 @@ define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpeqw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpeqw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -282,17 +234,9 @@ define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpeqb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpeqb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -327,17 +271,9 @@ define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpckldq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpckldq 16(%ebp), %mm0 # mm0 = mm0[0],mem[0]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -372,17 +308,9 @@ define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpcklwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpcklwd 16(%ebp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -417,17 +345,9 @@ define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpcklbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpcklbw 16(%ebp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -462,17 +382,9 @@ define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpckhdq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[1],mem[1]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpckhdq 16(%ebp), %mm0 # mm0 = mm0[1],mem[1]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -507,17 +419,9 @@ define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpckhwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpckhwd 16(%ebp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -552,17 +456,9 @@ define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpckhbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpckhbw 16(%ebp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -597,17 +493,9 @@ define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: packuswb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: packuswb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -642,17 +530,9 @@ define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: packssdw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: packssdw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -687,17 +567,9 @@ define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: packsswb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: packsswb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -732,12 +604,8 @@ define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psrad $3, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -770,12 +638,8 @@ define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psraw $3, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -806,12 +670,8 @@ define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -873,12 +733,8 @@ define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psrld $3, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -909,12 +765,8 @@ define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -944,12 +796,8 @@ define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psrlw $3, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -1014,12 +862,8 @@ define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: pslld $3, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -1052,12 +896,8 @@ define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psllw $3, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -1088,12 +928,8 @@ define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1123,12 +959,8 @@ define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psrad 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -1164,12 +996,8 @@ define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psraw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -1240,12 +1068,8 @@ define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psrld 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -1281,12 +1105,8 @@ define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psrlw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -1357,12 +1177,8 @@ define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: pslld 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -1398,12 +1214,8 @@ define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psllw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -1439,17 +1251,9 @@ define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pxor {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pxor 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1484,17 +1288,9 @@ define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: por {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: por 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1529,17 +1325,9 @@ define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pandn {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pandn 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1574,17 +1362,9 @@ define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pand {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pand 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1619,17 +1399,9 @@ define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmullw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1662,17 +1434,9 @@ define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmullw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1707,17 +1471,9 @@ define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmulhw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmulhw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1752,17 +1508,9 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmaddwd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmaddwd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1797,17 +1545,9 @@ define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psubusw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psubusw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1842,17 +1582,9 @@ define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psubusb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psubusb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1887,17 +1619,9 @@ define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psubsw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psubsw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1932,17 +1656,9 @@ define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psubsb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psubsb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2012,17 +1728,9 @@ define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psubd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psubd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2057,17 +1765,9 @@ define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psubw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psubw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2102,17 +1802,9 @@ define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psubb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psubb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2147,17 +1839,9 @@ define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: paddusw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: paddusw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2192,17 +1876,9 @@ define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: paddusb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: paddusb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2237,17 +1913,9 @@ define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: paddsw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: paddsw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2282,17 +1950,9 @@ define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: paddsb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: paddsb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2362,17 +2022,9 @@ define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: paddd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: paddd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2407,17 +2059,9 @@ define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: paddw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: paddw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2452,17 +2096,9 @@ define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: paddb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: paddb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2497,17 +2133,9 @@ define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psadbw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psadbw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2540,17 +2168,9 @@ define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pminsw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pminsw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2585,17 +2205,9 @@ define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pminub {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pminub 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2630,17 +2242,9 @@ define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmaxsw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmaxsw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2675,17 +2279,9 @@ define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmaxub {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmaxub 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2720,17 +2316,9 @@ define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pavgw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pavgw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2765,17 +2353,9 @@ define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pavgb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pavgb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2829,18 +2409,8 @@ declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-LABEL: test24:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, (%esp)
-; X86-NEXT: movq (%esp), %mm0
+; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
; X86-NEXT: pmovmskb %mm0, %eax
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: test24:
@@ -2860,26 +2430,12 @@ declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind
define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp {
; X86-LABEL: test23:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, (%esp)
-; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: movq (%esp), %mm1
-; X86-NEXT: maskmovq %mm0, %mm1
-; X86-NEXT: leal -4(%ebp), %esp
+; X86-NEXT: movq {{[0-9]+}}(%esp), %mm1
+; X86-NEXT: maskmovq %mm1, %mm0
; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: test23:
@@ -2906,17 +2462,9 @@ define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmulhuw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmulhuw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2951,12 +2499,8 @@ define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: pshufw $3, {{[0-9]+}}(%esp), %mm0 # mm0 = mem[3,0,0,0]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: pshufw $3, 8(%ebp), %mm0 # mm0 = mem[3,0,0,0]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -2983,18 +2527,8 @@ entry:
define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-LABEL: test21_2:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, (%esp)
-; X86-NEXT: pshufw $3, (%esp), %mm0 # mm0 = mem[3,0,0,0]
+; X86-NEXT: pshufw $3, {{[0-9]+}}(%esp), %mm0 # mm0 = mem[3,0,0,0]
; X86-NEXT: movd %mm0, %eax
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: test21_2:
@@ -3021,17 +2555,9 @@ define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmuludq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmuludq 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3061,17 +2587,7 @@ declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-LABEL: test19:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, (%esp)
-; X86-NEXT: cvtpi2pd (%esp), %xmm0
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
+; X86-NEXT: cvtpi2pd {{[0-9]+}}(%esp), %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test19:
@@ -3189,12 +2705,8 @@ define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: pabsd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: pabsd 8(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3226,12 +2738,8 @@ define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: pabsw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: pabsw 8(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3263,12 +2771,8 @@ define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: pabsb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: pabsb 8(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3300,17 +2804,9 @@ define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psignd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psignd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3345,17 +2841,9 @@ define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psignw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psignw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3390,17 +2878,9 @@ define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: psignb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: psignb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3435,17 +2915,9 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pshufb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pshufb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3480,17 +2952,9 @@ define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmulhrsw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmulhrsw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3525,17 +2989,9 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pmaddubsw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pmaddubsw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3570,17 +3026,9 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: phsubsw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: phsubsw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3615,17 +3063,9 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: phsubd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: phsubd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3660,17 +3100,9 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: phsubw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: phsubw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3705,17 +3137,9 @@ define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: phaddsw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: phaddsw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3750,17 +3174,9 @@ define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: phaddd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: phaddd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -3815,12 +3231,8 @@ define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: pinsrw $2, 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -3847,18 +3259,8 @@ declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg)
define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind {
; X86-LABEL: test_mm_extract_pi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, (%esp)
-; X86-NEXT: movq (%esp), %mm0
+; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
; X86-NEXT: pextrw $2, %mm0, %eax
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: test_mm_extract_pi16:
diff --git a/llvm/test/CodeGen/X86/pr35982.ll b/llvm/test/CodeGen/X86/pr35982.ll
index 4a79a109f8b603..9a700d1475e73f 100644
--- a/llvm/test/CodeGen/X86/pr35982.ll
+++ b/llvm/test/CodeGen/X86/pr35982.ll
@@ -6,8 +6,8 @@ define float @PR35982_emms(<1 x i64>) nounwind {
; NO-POSTRA-LABEL: PR35982_emms:
; NO-POSTRA: # %bb.0:
; NO-POSTRA-NEXT: subl $8, %esp
-; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax
; NO-POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax
; NO-POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
; NO-POSTRA-NEXT: movd %mm0, %ecx
; NO-POSTRA-NEXT: emms
@@ -50,8 +50,8 @@ define float @PR35982_femms(<1 x i64>) nounwind {
; NO-POSTRA-LABEL: PR35982_femms:
; NO-POSTRA: # %bb.0:
; NO-POSTRA-NEXT: subl $8, %esp
-; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax
; NO-POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax
; NO-POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
; NO-POSTRA-NEXT: movd %mm0, %ecx
; NO-POSTRA-NEXT: femms
More information about the llvm-commits
mailing list