[llvm-commits] [llvm] r167967 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/2008-10-27-StackRealignment.ll test/CodeGen/X86/memcpy-2.ll test/CodeGen/X86/memset-sse-stack-realignment.ll test/CodeGen/X86/memset64-on-x86-32.ll
Benjamin Kramer
benny.kra at googlemail.com
Wed Nov 14 12:08:40 PST 2012
Author: d0k
Date: Wed Nov 14 14:08:40 2012
New Revision: 167967
URL: http://llvm.org/viewvc/llvm-project?rev=167967&view=rev
Log:
X86: Enable SSE memory intrinsics even when stack alignment is less than 16 bytes.
The stack realignment code was fixed to work when there is stack realignment and
a dynamic alloca is present so this shouldn't cause correctness issues anymore.
Note that this also enables generation of AVX instructions for memset
under the assumptions:
- Unaligned loads/stores are always fast on CPUs supporting AVX
- AVX is not slower than SSE
We may need some tweaked heuristics if one of those assumptions turns out not to
be true.
Effectively reverts r58317. Part of PR2962.
Added:
llvm/trunk/test/CodeGen/X86/memset-sse-stack-realignment.ll
Removed:
llvm/trunk/test/CodeGen/X86/2008-10-27-StackRealignment.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/memcpy-2.ll
llvm/trunk/test/CodeGen/X86/memset64-on-x86-32.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=167967&r1=167966&r2=167967&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Nov 14 14:08:40 2012
@@ -1362,18 +1362,14 @@
bool IsZeroVal,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
- // linux. This is because the stack realignment code can't handle certain
- // cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (IsZeroVal &&
!F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
- (SrcAlign == 0 || SrcAlign >= 16))) &&
- Subtarget->getStackAlignment() >= 16) {
- if (Subtarget->getStackAlignment() >= 32) {
+ (SrcAlign == 0 || SrcAlign >= 16)))) {
+ if (Size >= 32) {
if (Subtarget->hasAVX2())
return MVT::v8i32;
if (Subtarget->hasAVX())
@@ -1385,7 +1381,6 @@
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
- Subtarget->getStackAlignment() >= 8 &&
Subtarget->hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
Removed: llvm/trunk/test/CodeGen/X86/2008-10-27-StackRealignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-10-27-StackRealignment.ll?rev=167966&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2008-10-27-StackRealignment.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2008-10-27-StackRealignment.ll (removed)
@@ -1,22 +0,0 @@
-; Linux doesn't support stack realignment for functions with allocas (PR2888).
-; Until it does, we shouldn't use movaps to access the stack. On targets with
-; sufficiently aligned stack (e.g. darwin) we should.
-; PR8969 - make 32-bit linux have a 16-byte aligned stack
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | grep movaps | count 2
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
-
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-
-define void @foo(i32 %t) nounwind {
- %tmp1210 = alloca i8, i32 32, align 4
- call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
- %x = alloca i8, i32 %t
- call void @dummy(i8* %x)
- ret void
-}
-
-declare void @dummy(i8*)
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
Modified: llvm/trunk/test/CodeGen/X86/memcpy-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcpy-2.ll?rev=167967&r1=167966&r2=167967&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcpy-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memcpy-2.ll Wed Nov 14 14:08:40 2012
@@ -1,4 +1,5 @@
; RUN: llc < %s -mattr=+sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mattr=+sse2 -mtriple=i686-pc-mingw32 -mcpu=core2 | FileCheck %s -check-prefix=SSE2
; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
; RUN: llc < %s -mattr=-sse -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
Added: llvm/trunk/test/CodeGen/X86/memset-sse-stack-realignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memset-sse-stack-realignment.ll?rev=167967&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memset-sse-stack-realignment.ll (added)
+++ llvm/trunk/test/CodeGen/X86/memset-sse-stack-realignment.ll Wed Nov 14 14:08:40 2012
@@ -0,0 +1,77 @@
+; Make sure that we realign the stack. Mingw32 uses 4 byte stack alignment, we
+; need 16 bytes for SSE and 32 bytes for AVX.
+
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s -check-prefix=NOSSE
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s -check-prefix=SSE1
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX1
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
+
+define void @test1(i32 %t) nounwind {
+ %tmp1210 = alloca i8, i32 32, align 4
+ call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
+ %x = alloca i8, i32 %t
+ call void @dummy(i8* %x)
+ ret void
+
+; NOSSE: test1:
+; NOSSE-NOT: and
+; NOSSE: movl $0
+
+; SSE1: test1:
+; SSE1: andl $-16
+; SSE1: movl %esp, %esi
+; SSE1: movaps
+
+; SSE2: test1:
+; SSE2: andl $-16
+; SSE2: movl %esp, %esi
+; SSE2: movaps
+
+; AVX1: test1:
+; AVX1: andl $-32
+; AVX1: movl %esp, %esi
+; AVX1: vmovaps %ymm
+
+; AVX2: test1:
+; AVX2: andl $-32
+; AVX2: movl %esp, %esi
+; AVX2: vmovaps %ymm
+
+}
+
+define void @test2(i32 %t) nounwind {
+ %tmp1210 = alloca i8, i32 16, align 4
+ call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 16, i32 4, i1 false)
+ %x = alloca i8, i32 %t
+ call void @dummy(i8* %x)
+ ret void
+
+; NOSSE: test2:
+; NOSSE-NOT: and
+; NOSSE: movl $0
+
+; SSE1: test2:
+; SSE1: andl $-16
+; SSE1: movl %esp, %esi
+; SSE1: movaps
+
+; SSE2: test2:
+; SSE2: andl $-16
+; SSE2: movl %esp, %esi
+; SSE2: movaps
+
+; AVX1: test2:
+; AVX1: andl $-16
+; AVX1: movl %esp, %esi
+; AVX1: vmovaps %xmm
+
+; AVX2: test2:
+; AVX2: andl $-16
+; AVX2: movl %esp, %esi
+; AVX2: vmovaps %xmm
+}
+
+declare void @dummy(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
Modified: llvm/trunk/test/CodeGen/X86/memset64-on-x86-32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memset64-on-x86-32.ll?rev=167967&r1=167966&r2=167967&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memset64-on-x86-32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memset64-on-x86-32.ll Wed Nov 14 14:08:40 2012
@@ -1,5 +1,6 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 5
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2 | grep movl | count 20
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core2 | grep movl | count 20
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | grep movq | count 10
define void @bork() nounwind {
More information about the llvm-commits
mailing list