[llvm] r302989 - [x86, SSE] AVX1 PR28129 (256-bit all-ones rematerialization)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat May 13 06:42:36 PDT 2017
Author: rksimon
Date: Sat May 13 08:42:35 2017
New Revision: 302989
URL: http://llvm.org/viewvc/llvm-project?rev=302989&view=rev
Log:
[x86, SSE] AVX1 PR28129 (256-bit all-ones rematerialization)
Further perf tests on Jaguar indicate that:
vxorps %ymm0, %ymm0, %ymm0
vcmpps $15, %ymm0, %ymm0, %ymm0
is consistently faster (by about 9%) than:
vpcmpeqd %xmm0, %xmm0, %xmm0
vinsertf128 $1, %xmm0, %ymm0, %ymm0
Testing equivalent code on a SandyBridge (E5-2640) puts it slightly (~3%) faster as well.
Committed on behalf of @dtemirbulatov
Differential Revision: https://reviews.llvm.org/D32416
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/all-ones-vector.ll
llvm/trunk/test/CodeGen/X86/avx-basic.ll
llvm/trunk/test/CodeGen/X86/avx-cvt-3.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/pr28129.ll
llvm/trunk/test/CodeGen/X86/vector-pcmp.ll
llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=302989&r1=302988&r2=302989&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat May 13 08:42:35 2017
@@ -7627,6 +7627,13 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
+ case X86::AVX1_SETALLONES: {
+ unsigned Reg = MIB->getOperand(0).getReg();
+ // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
+ MIB->setDesc(get(X86::VCMPPSYrri));
+ MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
+ return true;
+ }
case X86::AVX512_512_SETALLONES: {
unsigned Reg = MIB->getOperand(0).getReg();
MIB->setDesc(get(X86::VPTERNLOGDZrri));
@@ -8515,6 +8522,7 @@ MachineInstr *X86InstrInfo::foldMemoryOp
Alignment = 64;
break;
case X86::AVX2_SETALLONES:
+ case X86::AVX1_SETALLONES:
case X86::AVX_SET0:
case X86::AVX512_256_SET0:
Alignment = 32;
@@ -8560,6 +8568,7 @@ MachineInstr *X86InstrInfo::foldMemoryOp
case X86::V_SET0:
case X86::V_SETALLONES:
case X86::AVX2_SETALLONES:
+ case X86::AVX1_SETALLONES:
case X86::AVX_SET0:
case X86::AVX512_128_SET0:
case X86::AVX512_256_SET0:
@@ -8601,13 +8610,14 @@ MachineInstr *X86InstrInfo::foldMemoryOp
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
- Opc == X86::AVX512_256_SET0)
+ Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
- Opc == X86::AVX512_512_SETALLONES);
+ Opc == X86::AVX512_512_SETALLONES ||
+ Opc == X86::AVX1_SETALLONES);
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=302989&r1=302988&r2=302989&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat May 13 08:42:35 2017
@@ -486,6 +486,10 @@ let isReMaterializable = 1, isAsCheapAsA
isPseudo = 1, SchedRW = [WriteZero] in {
def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
+ let Predicates = [HasAVX1Only, OptForMinSize] in {
+ def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8i32 immAllOnesV))]>;
+ }
let Predicates = [HasAVX2] in
def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8i32 immAllOnesV))]>;
@@ -7755,14 +7759,12 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrc
[]>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
}
-
-// Without AVX2 we need to concat two v4i32 V_SETALLONES to create a 256-bit
-// all ones value.
-let Predicates = [HasAVX1Only] in
-def : Pat<(v8i32 immAllOnesV),
- (VINSERTF128rr
- (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), (V_SETALLONES), sub_xmm),
- (V_SETALLONES), 1)>;
+// To create a 256-bit all ones value, we should produce VCMPTRUEPS
+// with YMM register containing zero.
+// FIXME: Avoid producing vxorps to clear the fake inputs.
+let Predicates = [HasAVX1Only] in {
+def : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>;
+}
multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
PatFrag memop_frag> {
Modified: llvm/trunk/test/CodeGen/X86/all-ones-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/all-ones-vector.ll?rev=302989&r1=302988&r2=302989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/all-ones-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/all-ones-vector.ll Sat May 13 08:42:35 2017
@@ -157,8 +157,8 @@ define <32 x i8> @allones_v32i8() nounwi
;
; X32-AVX1-LABEL: allones_v32i8:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX256-LABEL: allones_v32i8:
@@ -174,8 +174,8 @@ define <32 x i8> @allones_v32i8() nounwi
;
; X64-AVX1-LABEL: allones_v32i8:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
; X64-AVX256-LABEL: allones_v32i8:
@@ -194,8 +194,8 @@ define <16 x i16> @allones_v16i16() noun
;
; X32-AVX1-LABEL: allones_v16i16:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX256-LABEL: allones_v16i16:
@@ -211,8 +211,8 @@ define <16 x i16> @allones_v16i16() noun
;
; X64-AVX1-LABEL: allones_v16i16:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
; X64-AVX256-LABEL: allones_v16i16:
@@ -231,8 +231,8 @@ define <8 x i32> @allones_v8i32() nounwi
;
; X32-AVX1-LABEL: allones_v8i32:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX256-LABEL: allones_v8i32:
@@ -248,8 +248,8 @@ define <8 x i32> @allones_v8i32() nounwi
;
; X64-AVX1-LABEL: allones_v8i32:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
; X64-AVX256-LABEL: allones_v8i32:
@@ -268,8 +268,8 @@ define <4 x i64> @allones_v4i64() nounwi
;
; X32-AVX1-LABEL: allones_v4i64:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX256-LABEL: allones_v4i64:
@@ -285,8 +285,8 @@ define <4 x i64> @allones_v4i64() nounwi
;
; X64-AVX1-LABEL: allones_v4i64:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
; X64-AVX256-LABEL: allones_v4i64:
@@ -305,8 +305,8 @@ define <4 x double> @allones_v4f64() nou
;
; X32-AVX1-LABEL: allones_v4f64:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX256-LABEL: allones_v4f64:
@@ -322,8 +322,8 @@ define <4 x double> @allones_v4f64() nou
;
; X64-AVX1-LABEL: allones_v4f64:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
; X64-AVX256-LABEL: allones_v4f64:
@@ -342,8 +342,8 @@ define <4 x double> @allones_v4f64_optsi
;
; X32-AVX1-LABEL: allones_v4f64_optsize:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX256-LABEL: allones_v4f64_optsize:
@@ -359,8 +359,8 @@ define <4 x double> @allones_v4f64_optsi
;
; X64-AVX1-LABEL: allones_v4f64_optsize:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
; X64-AVX256-LABEL: allones_v4f64_optsize:
@@ -379,8 +379,8 @@ define <8 x float> @allones_v8f32() noun
;
; X32-AVX1-LABEL: allones_v8f32:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX256-LABEL: allones_v8f32:
@@ -396,8 +396,8 @@ define <8 x float> @allones_v8f32() noun
;
; X64-AVX1-LABEL: allones_v8f32:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
; X64-AVX256-LABEL: allones_v8f32:
@@ -416,8 +416,8 @@ define <8 x float> @allones_v8f32_optsiz
;
; X32-AVX1-LABEL: allones_v8f32_optsize:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX256-LABEL: allones_v8f32_optsize:
@@ -433,8 +433,8 @@ define <8 x float> @allones_v8f32_optsiz
;
; X64-AVX1-LABEL: allones_v8f32_optsize:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
; X64-AVX256-LABEL: allones_v8f32_optsize:
@@ -455,8 +455,8 @@ define <64 x i8> @allones_v64i8() nounwi
;
; X32-AVX1-LABEL: allones_v64i8:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
@@ -487,8 +487,8 @@ define <64 x i8> @allones_v64i8() nounwi
;
; X64-AVX1-LABEL: allones_v64i8:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
@@ -522,8 +522,8 @@ define <32 x i16> @allones_v32i16() noun
;
; X32-AVX1-LABEL: allones_v32i16:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
@@ -554,8 +554,8 @@ define <32 x i16> @allones_v32i16() noun
;
; X64-AVX1-LABEL: allones_v32i16:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
@@ -589,8 +589,8 @@ define <16 x i32> @allones_v16i32() noun
;
; X32-AVX1-LABEL: allones_v16i32:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
@@ -615,8 +615,8 @@ define <16 x i32> @allones_v16i32() noun
;
; X64-AVX1-LABEL: allones_v16i32:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
@@ -644,8 +644,8 @@ define <8 x i64> @allones_v8i64() nounwi
;
; X32-AVX1-LABEL: allones_v8i64:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
@@ -670,8 +670,8 @@ define <8 x i64> @allones_v8i64() nounwi
;
; X64-AVX1-LABEL: allones_v8i64:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
@@ -699,8 +699,8 @@ define <8 x double> @allones_v8f64() nou
;
; X32-AVX1-LABEL: allones_v8f64:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
@@ -725,8 +725,8 @@ define <8 x double> @allones_v8f64() nou
;
; X64-AVX1-LABEL: allones_v8f64:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
@@ -754,8 +754,8 @@ define <16 x float> @allones_v16f32() no
;
; X32-AVX1-LABEL: allones_v16f32:
; X32-AVX1: # BB#0:
-; X32-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
;
@@ -780,8 +780,8 @@ define <16 x float> @allones_v16f32() no
;
; X64-AVX1-LABEL: allones_v16f32:
; X64-AVX1: # BB#0:
-; X64-AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/avx-basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-basic.ll?rev=302989&r1=302988&r2=302989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-basic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-basic.ll Sat May 13 08:42:35 2017
@@ -34,8 +34,8 @@ define void @zero256() nounwind ssp {
define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind {
; CHECK-LABEL: ones:
; CHECK: ## BB#0: ## %allocas
-; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; CHECK-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vmovaps %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -51,8 +51,8 @@ float>* %ptr2vec615, align 32
define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind {
; CHECK-LABEL: ones2:
; CHECK: ## BB#0: ## %allocas
-; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
+; CHECK-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vmovaps %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx-cvt-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-cvt-3.ll?rev=302989&r1=302988&r2=302989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-cvt-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-cvt-3.ll Sat May 13 08:42:35 2017
@@ -48,16 +48,16 @@ define <8 x float> @sitofp_shuffle_zero_
define <8 x float> @sitofp_insert_allbits_v8i32(<8 x i32> %a0) {
; X86-LABEL: sitofp_insert_allbits_v8i32:
; X86: # BB#0:
-; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_insert_allbits_v8i32:
; X64: # BB#0:
-; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
; X64-NEXT: retq
@@ -72,16 +72,16 @@ define <8 x float> @sitofp_insert_allbit
define <8 x float> @sitofp_shuffle_allbits_v8i32(<8 x i32> %a0) {
; X86-LABEL: sitofp_shuffle_allbits_v8i32:
; X86: # BB#0:
-; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_shuffle_allbits_v8i32:
; X64: # BB#0:
-; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
; X64-NEXT: retq
@@ -95,8 +95,7 @@ define <8 x float> @sitofp_insert_consta
; X86: # BB#0:
; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
-; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7]
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-NEXT: movl $2, %eax
@@ -111,8 +110,7 @@ define <8 x float> @sitofp_insert_consta
; X64: # BB#0:
; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
-; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7]
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-NEXT: movl $2, %eax
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll?rev=302989&r1=302988&r2=302989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll Sat May 13 08:42:35 2017
@@ -99,16 +99,16 @@ define <8 x float> @test_mm256_and_ps(<8
define <4 x double> @test_mm256_andnot_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
; X32-LABEL: test_mm256_andnot_pd:
; X32: # BB#0:
-; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; X32-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
+; X32-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; X32-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
; X32-NEXT: vxorps %ymm2, %ymm0, %ymm0
; X32-NEXT: vandps %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_andnot_pd:
; X64: # BB#0:
-; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; X64-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
+; X64-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; X64-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
; X64-NEXT: vxorps %ymm2, %ymm0, %ymm0
; X64-NEXT: vandps %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/pr28129.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr28129.ll?rev=302989&r1=302988&r2=302989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr28129.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr28129.ll Sat May 13 08:42:35 2017
@@ -5,15 +5,15 @@
define <4 x double> @cmp4f64_domain(<4 x double> %a) {
; X86-LABEL: cmp4f64_domain:
; X86: # BB#0:
-; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: cmp4f64_domain:
; X64: # BB#0:
-; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%cmp = fcmp oeq <4 x double> zeroinitializer, zeroinitializer
@@ -26,15 +26,15 @@ define <4 x double> @cmp4f64_domain(<4 x
define <4 x double> @cmp4f64_domain_optsize(<4 x double> %a) optsize {
; X86-LABEL: cmp4f64_domain_optsize:
; X86: # BB#0:
-; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: cmp4f64_domain_optsize:
; X64: # BB#0:
-; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%cmp = fcmp oeq <4 x double> zeroinitializer, zeroinitializer
@@ -47,15 +47,15 @@ define <4 x double> @cmp4f64_domain_opts
define <8 x float> @cmp8f32_domain(<8 x float> %a) {
; X86-LABEL: cmp8f32_domain:
; X86: # BB#0:
-; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: cmp8f32_domain:
; X64: # BB#0:
-; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%cmp = fcmp oeq <8 x float> zeroinitializer, zeroinitializer
@@ -68,15 +68,15 @@ define <8 x float> @cmp8f32_domain(<8 x
define <8 x float> @cmp8f32_domain_optsize(<8 x float> %a) optsize {
; X86-LABEL: cmp8f32_domain_optsize:
; X86: # BB#0:
-; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X86-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X86-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: cmp8f32_domain_optsize:
; X64: # BB#0:
-; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%cmp = fcmp oeq <8 x float> zeroinitializer, zeroinitializer
Modified: llvm/trunk/test/CodeGen/X86/vector-pcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-pcmp.ll?rev=302989&r1=302988&r2=302989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-pcmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-pcmp.ll Sat May 13 08:42:35 2017
@@ -148,8 +148,8 @@ define <32 x i8> @test_pcmpgtb_256(<32 x
; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -177,8 +177,8 @@ define <16 x i16> @test_pcmpgtw_256(<16
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -206,8 +206,8 @@ define <8 x i32> @test_pcmpgtd_256(<8 x
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -247,8 +247,8 @@ define <4 x i64> @test_pcmpgtq_256(<4 x
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll?rev=302989&r1=302988&r2=302989&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll Sat May 13 08:42:35 2017
@@ -499,8 +499,8 @@ declare <2 x i64> @llvm.x86.xop.vpcmov(<
define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; X32-LABEL: test_mm256_cmov_si256:
; X32: # BB#0:
-; X32-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; X32-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3
+; X32-NEXT: vxorps %ymm3, %ymm3, %ymm3
+; X32-NEXT: vcmptrueps %ymm3, %ymm3, %ymm3
; X32-NEXT: vxorps %ymm3, %ymm2, %ymm3
; X32-NEXT: vandps %ymm2, %ymm0, %ymm0
; X32-NEXT: vandps %ymm3, %ymm1, %ymm1
@@ -509,8 +509,8 @@ define <4 x i64> @test_mm256_cmov_si256(
;
; X64-LABEL: test_mm256_cmov_si256:
; X64: # BB#0:
-; X64-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; X64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3
+; X64-NEXT: vxorps %ymm3, %ymm3, %ymm3
+; X64-NEXT: vcmptrueps %ymm3, %ymm3, %ymm3
; X64-NEXT: vxorps %ymm3, %ymm2, %ymm3
; X64-NEXT: vandps %ymm2, %ymm0, %ymm0
; X64-NEXT: vandps %ymm3, %ymm1, %ymm1
More information about the llvm-commits
mailing list