[llvm] 169db80 - [X86] Canonicalize fp zero vectors from bitcasted integer zero vectors
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 10:34:34 PST 2023
Author: Simon Pilgrim
Date: 2023-11-30T18:33:52Z
New Revision: 169db80e41936811c6744f2c513a1ed00d97f10e
URL: https://github.com/llvm/llvm-project/commit/169db80e41936811c6744f2c513a1ed00d97f10e
DIFF: https://github.com/llvm/llvm-project/commit/169db80e41936811c6744f2c513a1ed00d97f10e.diff
LOG: [X86] Canonicalize fp zero vectors from bitcasted integer zero vectors
Generic code is supposed to handle this but can be blocked by hasOneUse checks.
Noticed while investigating #26392
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
llvm/test/CodeGen/X86/2012-07-10-extload64.ll
llvm/test/CodeGen/X86/fold-load-vec.ll
llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
llvm/test/CodeGen/X86/half.ll
llvm/test/CodeGen/X86/nontemporal-3.ll
llvm/test/CodeGen/X86/pr13577.ll
llvm/test/CodeGen/X86/pr41619.ll
llvm/test/CodeGen/X86/vec_zero_cse.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6167be7bdf84e9f..b73779edc5f7270 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42930,6 +42930,12 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
}
}
+ // Canonicalize fp zero vectors - these sometimes don't fold due to one use
+ // limits.
+ if (VT.isVector() && TLI.isTypeLegal(VT) && ISD::isBuildVectorAllZeros(N) &&
+ (VT.getScalarType() == MVT::f32 || VT.getScalarType() == MVT::f64))
+ return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N0));
+
// Try to remove a bitcast of constant vXi1 vector. We have to legalize
// most of these to scalar anyway.
if (Subtarget.hasAVX512() && VT.isScalarInteger() &&
diff --git a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
index e7f62b9dfc22196..d3f410d37567c36 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -49,14 +49,12 @@ entry:
define void @zero_test() {
; X86-LABEL: zero_test:
; X86: # %bb.0: # %entry
-; X86-NEXT: xorps %xmm0, %xmm0
-; X86-NEXT: movlps %xmm0, (%eax)
+; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: zero_test:
; X64: # %bb.0: # %entry
-; X64-NEXT: xorps %xmm0, %xmm0
-; X64-NEXT: movlps %xmm0, (%rax)
+; X64-NEXT: movq $0, (%rax)
; X64-NEXT: retq
entry:
%0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll
index b6ec3b34eb1072d..f4ec8bde3700a9b 100644
--- a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll
+++ b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -29,8 +29,8 @@ define void @store_64(ptr %ptr) {
; X86-LABEL: store_64:
; X86: # %bb.0: # %BB
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorps %xmm0, %xmm0
-; X86-NEXT: movlps %xmm0, (%eax)
+; X86-NEXT: movl $0, 4(%eax)
+; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: store_64:
diff --git a/llvm/test/CodeGen/X86/fold-load-vec.ll b/llvm/test/CodeGen/X86/fold-load-vec.ll
index 348929cdf9f79e4..0bf846a0930bb4c 100644
--- a/llvm/test/CodeGen/X86/fold-load-vec.ll
+++ b/llvm/test/CodeGen/X86/fold-load-vec.ll
@@ -10,8 +10,8 @@ define void @sample_test(ptr %source, ptr %dest) nounwind {
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, (%rsp)
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movlps %xmm0, (%rsp)
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: movlps %xmm0, (%rsp)
; CHECK-NEXT: movlps %xmm0, (%rsi)
diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 88425ea87845dfe..713ecf5414bac5c 100644
--- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -51,11 +51,6 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X32-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
-; X32-NEXT: xorps %xmm0, %xmm0
-; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
-; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X32-NEXT: mulps %xmm0, %xmm0
-; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X32-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
@@ -64,8 +59,10 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
; X32-NEXT: cmpunordps %xmm0, %xmm0
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT: xorps %xmm0, %xmm0
+; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X32-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X32-NEXT: minps %xmm0, %xmm0
; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
@@ -135,11 +132,6 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
-; X64-NEXT: xorps %xmm0, %xmm0
-; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
-; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X64-NEXT: mulps %xmm0, %xmm0
-; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload
; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
@@ -148,8 +140,10 @@ define void @program_1(ptr %dest, ptr %t0, <4 x float> %p0, <4 x float> %p1, <4
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
; X64-NEXT: cmpunordps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
-; X64-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: minps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
; X64-NEXT: xorl %ebx, %ebx
; X64-NEXT: xorps %xmm3, %xmm3
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
index 596e465ee8cacf2..7225257203161b2 100644
--- a/llvm/test/CodeGen/X86/half.ll
+++ b/llvm/test/CodeGen/X86/half.ll
@@ -1082,12 +1082,11 @@ define void @main.158() #0 {
; BWON-F16C-LABEL: main.158:
; BWON-F16C: # %bb.0: # %entry
; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
-; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1
-; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1
+; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
+; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2
; BWON-F16C-NEXT: jae .LBB20_2
; BWON-F16C-NEXT: # %bb.1: # %entry
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -1100,8 +1099,7 @@ define void @main.158() #0 {
; CHECK-I686-LABEL: main.158:
; CHECK-I686: # %bb.0: # %entry
; CHECK-I686-NEXT: subl $12, %esp
-; CHECK-I686-NEXT: pxor %xmm0, %xmm0
-; CHECK-I686-NEXT: movd %xmm0, (%esp)
+; CHECK-I686-NEXT: movl $0, (%esp)
; CHECK-I686-NEXT: calll __truncsfhf2
; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
; CHECK-I686-NEXT: movw %ax, (%esp)
diff --git a/llvm/test/CodeGen/X86/nontemporal-3.ll b/llvm/test/CodeGen/X86/nontemporal-3.ll
index a2d2c5ca4301186..f9872b10097a150 100644
--- a/llvm/test/CodeGen/X86/nontemporal-3.ll
+++ b/llvm/test/CodeGen/X86/nontemporal-3.ll
@@ -93,247 +93,66 @@ define void @test_zero_v4f64_align1(ptr %dst) nounwind {
}
define void @test_zero_v8f32_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v8f32_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v8f32_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorl %eax, %eax
-; SSE4A-NEXT: movntiq %rax, 8(%rdi)
-; SSE4A-NEXT: movntiq %rax, 24(%rdi)
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v8f32_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v8f32_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v8f32_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v8f32_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <8 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v4i64_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v4i64_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v4i64_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v4i64_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v4i64_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v4i64_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v4i64_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <4 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v8i32_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v8i32_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v8i32_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v8i32_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v8i32_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v8i32_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v8i32_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <8 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v16i16_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v16i16_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v16i16_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v16i16_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v16i16_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v16i16_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v16i16_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <16 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v32i8_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v32i8_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v32i8_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v32i8_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v32i8_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v32i8_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v32i8_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <32 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
@@ -508,347 +327,86 @@ define void @test_zero_v8f64_align1(ptr %dst) nounwind {
}
define void @test_zero_v16f32_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v16f32_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v16f32_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorl %eax, %eax
-; SSE4A-NEXT: movntiq %rax, 8(%rdi)
-; SSE4A-NEXT: movntiq %rax, 24(%rdi)
-; SSE4A-NEXT: movntiq %rax, 40(%rdi)
-; SSE4A-NEXT: movntiq %rax, 56(%rdi)
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v16f32_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v16f32_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v16f32_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v16f32_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <16 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v8i64_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v8i64_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v8i64_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v8i64_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v8i64_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v8i64_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v8i64_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <8 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v16i32_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v16i32_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v16i32_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v16i32_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v16i32_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v16i32_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v16i32_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <16 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v32i16_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v32i16_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v32i16_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v32i16_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v32i16_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v32i16_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v32i16_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <32 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v64i8_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v64i8_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v64i8_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v64i8_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v64i8_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v64i8_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v64i8_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <64 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
@@ -1214,3 +772,7 @@ define void @test_zero_v64i8_align32(ptr %dst) nounwind {
}
!1 = !{i32 1}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SSE2: {{.*}}
+; SSE41: {{.*}}
+; SSE4A: {{.*}}
diff --git a/llvm/test/CodeGen/X86/pr13577.ll b/llvm/test/CodeGen/X86/pr13577.ll
index 7511560d85f5191..ef359e740c09d7c 100644
--- a/llvm/test/CodeGen/X86/pr13577.ll
+++ b/llvm/test/CodeGen/X86/pr13577.ll
@@ -29,7 +29,8 @@ declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
define float @pr26070() {
; CHECK-LABEL: pr26070:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = call float @copysignf(float 1.0, float undef) readnone
ret float %c
diff --git a/llvm/test/CodeGen/X86/pr41619.ll b/llvm/test/CodeGen/X86/pr41619.ll
index 7d1d139a38a520d..88dcd7798f0c3d2 100644
--- a/llvm/test/CodeGen/X86/pr41619.ll
+++ b/llvm/test/CodeGen/X86/pr41619.ll
@@ -7,10 +7,9 @@ define void @foo(double %arg) {
; CHECK: ## %bb.0: ## %bb
; CHECK-NEXT: vmovq %xmm0, %rax
; CHECK-NEXT: vmovd %eax, %xmm0
-; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vmovq %xmm0, %rax
; CHECK-NEXT: movl %eax, (%rax)
-; CHECK-NEXT: vmovlps %xmm1, (%rax)
+; CHECK-NEXT: movq $0, (%rax)
; CHECK-NEXT: retq
bb:
%tmp = bitcast double %arg to i64
diff --git a/llvm/test/CodeGen/X86/vec_zero_cse.ll b/llvm/test/CodeGen/X86/vec_zero_cse.ll
index 99185277ba745d5..800dd59c262666d 100644
--- a/llvm/test/CodeGen/X86/vec_zero_cse.ll
+++ b/llvm/test/CodeGen/X86/vec_zero_cse.ll
@@ -15,8 +15,8 @@ define void @test1() {
; X32: # %bb.0:
; X32-NEXT: movl $0, M1+4
; X32-NEXT: movl $0, M1
-; X32-NEXT: xorps %xmm0, %xmm0
-; X32-NEXT: movlps %xmm0, M2
+; X32-NEXT: movl $0, M2+4
+; X32-NEXT: movl $0, M2
; X32-NEXT: retl
;
; X64-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index 23c37af1db2f7c9..eb5fc1523c08a7d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -108,11 +108,10 @@ define void @PR46178(ptr %0) {
; X86-NEXT: vmovdqu (%eax), %ymm1
; X86-NEXT: vpmovqw %ymm0, %xmm0
; X86-NEXT: vpmovqw %ymm1, %xmm1
-; X86-NEXT: vpsllw $8, %xmm0, %xmm0
-; X86-NEXT: vpsraw $8, %xmm0, %xmm0
-; X86-NEXT: vpsllw $8, %xmm1, %xmm1
-; X86-NEXT: vpsraw $8, %xmm1, %xmm1
-; X86-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; X86-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X86-NEXT: vpsllw $8, %ymm0, %ymm0
+; X86-NEXT: vpsraw $8, %ymm0, %ymm0
+; X86-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,1]
; X86-NEXT: vmovdqu %ymm0, (%eax)
; X86-NEXT: vzeroupper
; X86-NEXT: retl
More information about the llvm-commits
mailing list