[llvm-commits] [llvm] r172894 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/2012-01-11-split-cv.ll test/CodeGen/X86/MergeConsecutiveStores.ll test/CodeGen/X86/avx-load-store.ll test/CodeGen/X86/avx-sext.ll test/CodeGen/X86/fp-load-trunc.ll test/CodeGen/X86/sandybridge-loads.ll test/CodeGen/X86/v8i1-masks.ll test/CodeGen/X86/vec_fpext.ll
Nadav Rotem
nrotem at apple.com
Sat Jan 19 00:38:41 PST 2013
Author: nadav
Date: Sat Jan 19 02:38:41 2013
New Revision: 172894
URL: http://llvm.org/viewvc/llvm-project?rev=172894&view=rev
Log:
On Sandybridge split unaligned 256bit stores into two xmm-sized stores.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/2012-01-11-split-cv.ll
llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
llvm/trunk/test/CodeGen/X86/avx-load-store.ll
llvm/trunk/test/CodeGen/X86/avx-sext.ll
llvm/trunk/test/CodeGen/X86/fp-load-trunc.ll
llvm/trunk/test/CodeGen/X86/sandybridge-loads.ll
llvm/trunk/test/CodeGen/X86/v8i1-masks.ll
llvm/trunk/test/CodeGen/X86/vec_fpext.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=172894&r1=172893&r2=172894&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 19 02:38:41 2013
@@ -16344,12 +16344,15 @@
ISD::LoadExtType Ext = Ld->getExtensionType();
unsigned Alignment = Ld->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8;
// On Sandybridge unaligned 256bit loads are inefficient.
if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
- !DCI.isBeforeLegalizeOps() && Alignment < 32 &&
- Ext == ISD::NON_EXTLOAD) {
+ !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
unsigned NumElems = RegVT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
SDValue Ptr = Ld->getBasePtr();
SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
@@ -16363,7 +16366,7 @@
SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
- Alignment);
+ std::max(Alignment/2U, 1U));
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Load1.getValue(1),
Load2.getValue(1));
@@ -16536,16 +16539,21 @@
DebugLoc dl = St->getDebugLoc();
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned Alignment = St->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8;
// If we are saving a concatenation of two XMM registers, perform two stores.
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
if (VT.is256BitVector() && !Subtarget->hasInt256() &&
- StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
- StoredVal.getNumOperands() == 2) {
- SDValue Value0 = StoredVal.getOperand(0);
- SDValue Value1 = StoredVal.getOperand(1);
+ StVT == VT && !IsAligned) {
+ unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
+ SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
SDValue Ptr0 = St->getBasePtr();
@@ -16553,10 +16561,11 @@
SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(), Alignment);
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(),
+ std::max(Alignment/2U, 1U));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
Modified: llvm/trunk/test/CodeGen/X86/2012-01-11-split-cv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-01-11-split-cv.ll?rev=172894&r1=172893&r2=172894&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2012-01-11-split-cv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2012-01-11-split-cv.ll Sat Jan 19 02:38:41 2013
@@ -2,7 +2,7 @@
;CHECK: add18i16
define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
-;CHECK: vmovups
+;CHECK: vmovaps
%b = load <18 x i16>* %bp, align 16
%x = add <18 x i16> zeroinitializer, %b
store <18 x i16> %x, <18 x i16>* %ret, align 16
Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=172894&r1=172893&r2=172894&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
+++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Sat Jan 19 02:38:41 2013
@@ -42,7 +42,7 @@
; Move the constants using a single vector store.
; CHECK: merge_const_store_vec
-; CHECK: vmovups %ymm0, (%rsi)
+; CHECK: vmovups
; CHECK: ret
define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
%1 = icmp sgt i32 %count, 0
Modified: llvm/trunk/test/CodeGen/X86/avx-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-load-store.ll?rev=172894&r1=172893&r2=172894&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-load-store.ll Sat Jan 19 02:38:41 2013
@@ -53,19 +53,24 @@
unreachable
}
-; CHECK: vmovups %ymm
+; CHECK: storev16i16_01
+; CHECK: vextractf128
+; CHECK: vmovaps %xmm
define void @storev16i16_01(<16 x i16> %a) nounwind {
store <16 x i16> %a, <16 x i16>* undef, align 4
unreachable
}
+; CHECK: storev32i8
; CHECK: vmovaps %ymm
define void @storev32i8(<32 x i8> %a) nounwind {
store <32 x i8> %a, <32 x i8>* undef, align 32
unreachable
}
-; CHECK: vmovups %ymm
+; CHECK: storev32i8_01
+; CHECK: vextractf128
+; CHECK: vmovups %xmm
define void @storev32i8_01(<32 x i8> %a) nounwind {
store <32 x i8> %a, <32 x i8>* undef, align 4
unreachable
@@ -76,7 +81,7 @@
; CHECK: _double_save
; CHECK-NOT: vinsertf128 $1
; CHECK-NOT: vinsertf128 $0
-; CHECK: vmovaps %xmm
+; CHECK: vmovups %xmm
; CHECK: vmovaps %xmm
define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp {
entry:
Modified: llvm/trunk/test/CodeGen/X86/avx-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-sext.ll?rev=172894&r1=172893&r2=172894&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-sext.ll Sat Jan 19 02:38:41 2013
@@ -186,18 +186,6 @@
ret void
}
-; AVX: sext_5
-; AVX: vpmovsxbw
-; AVX: vpmovsxwd
-; AVX: vpmovsxwd
-; AVX: vpmovsxdq
-; AVX: ret
-define void @sext_5(<8 x i8>* %inbuf, <8 x i64>* %outbuf) {
- %v0 = load <8 x i8>* %inbuf
- %r = sext <8 x i8> %v0 to <8 x i64>
- store <8 x i64> %r, <8 x i64>* %outbuf
- ret void
-}
; AVX: sext_6
; AVX: vpmovsxbw
; AVX: vpmovsxwd
Modified: llvm/trunk/test/CodeGen/X86/fp-load-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-load-trunc.ll?rev=172894&r1=172893&r2=172894&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-load-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-load-trunc.ll Sat Jan 19 02:38:41 2013
@@ -49,8 +49,8 @@
; CHECK: movlhps
; CHECK: ret
; AVX: test4
-; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
-; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
+; AVX: vcvtpd2psy
+; AVX: vcvtpd2psy
; AVX: vinsertf128
; AVX: ret
%x = load <8 x double>* %p
Modified: llvm/trunk/test/CodeGen/X86/sandybridge-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sandybridge-loads.ll?rev=172894&r1=172893&r2=172894&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sandybridge-loads.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sandybridge-loads.ll Sat Jan 19 02:38:41 2013
@@ -3,7 +3,7 @@
;CHECK: wideloads
;CHECK: vmovaps
;CHECK: vinsertf128
-;CHECK: vmovups
+;CHECK: vmovaps
;CHECK-NOT: vinsertf128
;CHECK: ret
@@ -11,11 +11,29 @@
%v0 = load <8 x float>* %a, align 16 ; <---- unaligned!
%v1 = load <8 x float>* %b, align 32 ; <---- aligned!
%m0 = fcmp olt <8 x float> %v1, %v0
- %v2 = load <8 x float>* %c, align 16
+ %v2 = load <8 x float>* %c, align 32 ; <---- aligned!
%m1 = fcmp olt <8 x float> %v2, %v0
%mand = and <8 x i1> %m1, %m0
%r = zext <8 x i1> %mand to <8 x i32>
- store <8 x i32> %r, <8 x i32>* undef, align 16
+ store <8 x i32> %r, <8 x i32>* undef, align 32
+ ret void
+}
+
+; CHECK: widestores
+; loads:
+; CHECK: vmovaps
+; CHECK: vmovaps
+; stores:
+; CHECK: vmovaps
+; CHECK: vextractf128
+; CHECK: vmovaps
+;CHECK: ret
+
+define void @widestores(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+ %v0 = load <8 x float>* %a, align 32
+ %v1 = load <8 x float>* %b, align 32
+ store <8 x float> %v0, <8 x float>* %b, align 32 ; <--- aligned
+ store <8 x float> %v1, <8 x float>* %a, align 16 ; <--- unaligned
ret void
}
Modified: llvm/trunk/test/CodeGen/X86/v8i1-masks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/v8i1-masks.ll?rev=172894&r1=172893&r2=172894&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/v8i1-masks.ll (original)
+++ llvm/trunk/test/CodeGen/X86/v8i1-masks.ll Sat Jan 19 02:38:41 2013
@@ -6,7 +6,7 @@
;CHECK: vcmpltp
;CHECK: vandps
;CHECK: vandps
-;CHECK: vmovups
+;CHECK: vmovaps
;CHECK: ret
define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
@@ -17,7 +17,7 @@
%m1 = fcmp olt <8 x float> %v2, %v0
%mand = and <8 x i1> %m1, %m0
%r = zext <8 x i1> %mand to <8 x i32>
- store <8 x i32> %r, <8 x i32>* undef, align 16
+ store <8 x i32> %r, <8 x i32>* undef, align 32
ret void
}
@@ -25,7 +25,7 @@
;CHECK: vcmpltps
;CHECK: vxorps
;CHECK: vandps
-;CHECK: vmovups
+;CHECK: vmovaps
;CHECK: ret
define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
%v0 = load <8 x float>* %a, align 16
@@ -33,7 +33,7 @@
%m0 = fcmp olt <8 x float> %v1, %v0
%mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%r = zext <8 x i1> %mand to <8 x i32>
- store <8 x i32> %r, <8 x i32>* undef, align 16
+ store <8 x i32> %r, <8 x i32>* undef, align 32
ret void
}
Modified: llvm/trunk/test/CodeGen/X86/vec_fpext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fpext.ll?rev=172894&r1=172893&r2=172894&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fpext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fpext.ll Sat Jan 19 02:38:41 2013
@@ -29,8 +29,8 @@
; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
-; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
%0 = load <8 x float>* %in
%1 = fpext <8 x float> %0 to <8 x double>
store <8 x double> %1, <8 x double>* %out, align 1
More information about the llvm-commits
mailing list