[llvm] [DAG] visitEXTRACT_VECTOR_ELT - constant fold legal fp imm values (PR #74304)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 5 01:39:27 PST 2023
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/74304
>From 25dd56167af4a27fc4d68d432f7a96001c7b63fd Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 4 Dec 2023 11:36:31 +0000
Subject: [PATCH] [DAG] visitEXTRACT_VECTOR_ELT - constant fold legal fp imm
values
If we're extracting a constant floating point value, and the constant is a legal fp imm value, then replace the extraction with a fp constant.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 +
.../aarch64-neon-vector-insert-uaddlv.ll | 3 +-
...rm64-promote-const-complex-initializers.ll | 20 +-
.../CodeGen/X86/2011-10-19-widen_vselect.ll | 6 +-
llvm/test/CodeGen/X86/2012-07-10-extload64.ll | 4 +-
llvm/test/CodeGen/X86/fold-load-vec.ll | 2 +-
llvm/test/CodeGen/X86/half.ll | 14 +-
llvm/test/CodeGen/X86/nontemporal-3.ll | 646 +++---------------
llvm/test/CodeGen/X86/pr41619.ll | 3 +-
llvm/test/CodeGen/X86/vec_zero_cse.ll | 4 +-
.../CodeGen/X86/vector-shuffle-combining.ll | 2 +-
11 files changed, 138 insertions(+), 579 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2a3425a42607e..e64a380965d4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22243,6 +22243,19 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
unsigned NumElts = VecVT.getVectorNumElements();
unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+ // See if the extracted element is constant, in which case fold it if its
+ // a legal fp immediate.
+ if (IndexC && ScalarVT.isFloatingPoint()) {
+ APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
+ KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
+ if (KnownElt.isConstant()) {
+ APFloat CstFP =
+ APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
+ if (TLI.isFPImmLegal(CstFP, ScalarVT))
+ return DAG.getConstantFP(CstFP, DL, ScalarVT);
+ }
+ }
+
// TODO: These transforms should not require the 'hasOneUse' restriction, but
// there are regressions on multiple targets without it. We can end up with a
// mess of scalar and vector code if we reduce only part of the DAG to scalar.
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
index 55750ab34e17a..3c8aca5145261 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
@@ -69,11 +69,10 @@ define void @insert_vec_v23i32_uaddlv_from_v8i16(ptr %0) {
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: movi.2d v0, #0000000000000000
; CHECK-NEXT: movi.2d v2, #0000000000000000
-; CHECK-NEXT: add x8, x0, #88
+; CHECK-NEXT: str wzr, [x0, #88]
; CHECK-NEXT: uaddlv.8h s1, v0
; CHECK-NEXT: stp q0, q0, [x0, #16]
; CHECK-NEXT: stp q0, q0, [x0, #48]
-; CHECK-NEXT: st1.s { v0 }[2], [x8]
; CHECK-NEXT: str d0, [x0, #80]
; CHECK-NEXT: mov.s v2[0], v1[0]
; CHECK-NEXT: ucvtf.4s v1, v2
diff --git a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll
index 86ebf803c5783..bbd2acbab4246 100644
--- a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll
@@ -30,10 +30,10 @@ define [1 x <4 x float>] @test1() {
define [1 x <4 x float>] @test2() {
; CHECK-LABEL: .p2align 4, 0x0 ; -- Begin function test2
; CHECK-NEXT: lCPI1_0:
-; CHECK-NEXT: .long 0x00000000 ; float 0
-; CHECK-NEXT: .long 0x00000000 ; float 0
-; CHECK-NEXT: .long 0x00000000 ; float 0
-; CHECK-NEXT: .long 0x3f800000 ; float 1
+; CHECK-NEXT: .long 0x80000000 ; float -0
+; CHECK-NEXT: .long 0x80000000 ; float -0
+; CHECK-NEXT: .long 0x80000000 ; float -0
+; CHECK-NEXT: .long 0xbf800000 ; float -1
; CHECK-NEXT: .section __TEXT,__text,regular,pure_instructions
; CHECK-NEXT: .globl _test2
; CHECK-NEXT: .p2align 2
@@ -43,17 +43,7 @@ define [1 x <4 x float>] @test2() {
; CHECK-NEXT: Lloh2:
; CHECK-NEXT: adrp x8, lCPI1_0 at PAGE
; CHECK-NEXT: Lloh3:
-; CHECK-NEXT: ldr q1, [x8, lCPI1_0 at PAGEOFF]
-; CHECK-NEXT: mov s2, v1[1]
-; CHECK-NEXT: fneg s0, s1
-; CHECK-NEXT: mov s3, v1[2]
-; CHECK-NEXT: mov s1, v1[3]
-; CHECK-NEXT: fneg s2, s2
-; CHECK-NEXT: fneg s3, s3
-; CHECK-NEXT: fneg s1, s1
-; CHECK-NEXT: mov.s v0[1], v2[0]
-; CHECK-NEXT: mov.s v0[2], v3[0]
-; CHECK-NEXT: mov.s v0[3], v1[0]
+; CHECK-NEXT: ldr q0, [x8, lCPI1_0 at PAGEOFF]
; CHECK-NEXT: ret
;
%constexpr = fneg float extractelement (<4 x float> bitcast (<1 x i128> <i128 84405977732342157929391748327801880576> to <4 x float>), i32 0)
diff --git a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
index e7f62b9dfc221..d3f410d37567c 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -49,14 +49,12 @@ entry:
define void @zero_test() {
; X86-LABEL: zero_test:
; X86: # %bb.0: # %entry
-; X86-NEXT: xorps %xmm0, %xmm0
-; X86-NEXT: movlps %xmm0, (%eax)
+; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: zero_test:
; X64: # %bb.0: # %entry
-; X64-NEXT: xorps %xmm0, %xmm0
-; X64-NEXT: movlps %xmm0, (%rax)
+; X64-NEXT: movq $0, (%rax)
; X64-NEXT: retq
entry:
%0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll
index b6ec3b34eb107..f4ec8bde3700a 100644
--- a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll
+++ b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -29,8 +29,8 @@ define void @store_64(ptr %ptr) {
; X86-LABEL: store_64:
; X86: # %bb.0: # %BB
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorps %xmm0, %xmm0
-; X86-NEXT: movlps %xmm0, (%eax)
+; X86-NEXT: movl $0, 4(%eax)
+; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: store_64:
diff --git a/llvm/test/CodeGen/X86/fold-load-vec.ll b/llvm/test/CodeGen/X86/fold-load-vec.ll
index 348929cdf9f79..0bf846a0930bb 100644
--- a/llvm/test/CodeGen/X86/fold-load-vec.ll
+++ b/llvm/test/CodeGen/X86/fold-load-vec.ll
@@ -10,8 +10,8 @@ define void @sample_test(ptr %source, ptr %dest) nounwind {
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq $0, (%rsp)
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movlps %xmm0, (%rsp)
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: movlps %xmm0, (%rsp)
; CHECK-NEXT: movlps %xmm0, (%rsi)
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
index 596e465ee8cac..7225257203161 100644
--- a/llvm/test/CodeGen/X86/half.ll
+++ b/llvm/test/CodeGen/X86/half.ll
@@ -1082,12 +1082,11 @@ define void @main.158() #0 {
; BWON-F16C-LABEL: main.158:
; BWON-F16C: # %bb.0: # %entry
; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
-; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1
-; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1
+; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
+; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2
; BWON-F16C-NEXT: jae .LBB20_2
; BWON-F16C-NEXT: # %bb.1: # %entry
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -1100,8 +1099,7 @@ define void @main.158() #0 {
; CHECK-I686-LABEL: main.158:
; CHECK-I686: # %bb.0: # %entry
; CHECK-I686-NEXT: subl $12, %esp
-; CHECK-I686-NEXT: pxor %xmm0, %xmm0
-; CHECK-I686-NEXT: movd %xmm0, (%esp)
+; CHECK-I686-NEXT: movl $0, (%esp)
; CHECK-I686-NEXT: calll __truncsfhf2
; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
; CHECK-I686-NEXT: movw %ax, (%esp)
diff --git a/llvm/test/CodeGen/X86/nontemporal-3.ll b/llvm/test/CodeGen/X86/nontemporal-3.ll
index a2d2c5ca43011..f9872b10097a1 100644
--- a/llvm/test/CodeGen/X86/nontemporal-3.ll
+++ b/llvm/test/CodeGen/X86/nontemporal-3.ll
@@ -93,247 +93,66 @@ define void @test_zero_v4f64_align1(ptr %dst) nounwind {
}
define void @test_zero_v8f32_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v8f32_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v8f32_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorl %eax, %eax
-; SSE4A-NEXT: movntiq %rax, 8(%rdi)
-; SSE4A-NEXT: movntiq %rax, 24(%rdi)
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v8f32_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v8f32_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v8f32_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v8f32_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <8 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v4i64_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v4i64_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v4i64_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v4i64_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v4i64_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v4i64_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v4i64_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <4 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v8i32_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v8i32_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v8i32_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v8i32_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v8i32_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v8i32_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v8i32_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <8 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v16i16_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v16i16_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v16i16_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v16i16_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v16i16_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v16i16_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v16i16_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <16 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v32i8_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v32i8_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v32i8_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v32i8_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v32i8_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v32i8_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v32i8_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: retq
store <32 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
@@ -508,347 +327,86 @@ define void @test_zero_v8f64_align1(ptr %dst) nounwind {
}
define void @test_zero_v16f32_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v16f32_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v16f32_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorl %eax, %eax
-; SSE4A-NEXT: movntiq %rax, 8(%rdi)
-; SSE4A-NEXT: movntiq %rax, 24(%rdi)
-; SSE4A-NEXT: movntiq %rax, 40(%rdi)
-; SSE4A-NEXT: movntiq %rax, 56(%rdi)
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v16f32_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v16f32_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v16f32_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v16f32_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <16 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v8i64_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v8i64_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v8i64_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v8i64_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v8i64_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v8i64_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v8i64_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <8 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v16i32_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v16i32_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v16i32_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v16i32_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v16i32_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v16i32_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v16i32_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <16 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v32i16_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v32i16_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v32i16_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v32i16_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v32i16_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v32i16_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v32i16_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <32 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
define void @test_zero_v64i8_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v64i8_align1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
-; SSE2-NEXT: movntiq %rax, 8(%rdi)
-; SSE2-NEXT: movntiq %rax, (%rdi)
-; SSE2-NEXT: movntiq %rax, 24(%rdi)
-; SSE2-NEXT: movntiq %rax, 16(%rdi)
-; SSE2-NEXT: movntiq %rax, 40(%rdi)
-; SSE2-NEXT: movntiq %rax, 32(%rdi)
-; SSE2-NEXT: movntiq %rax, 56(%rdi)
-; SSE2-NEXT: movntiq %rax, 48(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE4A-LABEL: test_zero_v64i8_align1:
-; SSE4A: # %bb.0:
-; SSE4A-NEXT: xorps %xmm0, %xmm0
-; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, (%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
-; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT: retq
-;
-; SSE41-LABEL: test_zero_v64i8_align1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: movntiq %rax, 8(%rdi)
-; SSE41-NEXT: movntiq %rax, (%rdi)
-; SSE41-NEXT: movntiq %rax, 24(%rdi)
-; SSE41-NEXT: movntiq %rax, 16(%rdi)
-; SSE41-NEXT: movntiq %rax, 40(%rdi)
-; SSE41-NEXT: movntiq %rax, 32(%rdi)
-; SSE41-NEXT: movntiq %rax, 56(%rdi)
-; SSE41-NEXT: movntiq %rax, 48(%rdi)
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: test_zero_v64i8_align1:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: movntiq %rax, 8(%rdi)
-; AVX-NEXT: movntiq %rax, (%rdi)
-; AVX-NEXT: movntiq %rax, 24(%rdi)
-; AVX-NEXT: movntiq %rax, 16(%rdi)
-; AVX-NEXT: movntiq %rax, 40(%rdi)
-; AVX-NEXT: movntiq %rax, 32(%rdi)
-; AVX-NEXT: movntiq %rax, 56(%rdi)
-; AVX-NEXT: movntiq %rax, 48(%rdi)
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: test_zero_v64i8_align1:
-; AVX512: # %bb.0:
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: movntiq %rax, 8(%rdi)
-; AVX512-NEXT: movntiq %rax, (%rdi)
-; AVX512-NEXT: movntiq %rax, 24(%rdi)
-; AVX512-NEXT: movntiq %rax, 16(%rdi)
-; AVX512-NEXT: movntiq %rax, 40(%rdi)
-; AVX512-NEXT: movntiq %rax, 32(%rdi)
-; AVX512-NEXT: movntiq %rax, 56(%rdi)
-; AVX512-NEXT: movntiq %rax, 48(%rdi)
-; AVX512-NEXT: retq
+; CHECK-LABEL: test_zero_v64i8_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movntiq %rax, 8(%rdi)
+; CHECK-NEXT: movntiq %rax, (%rdi)
+; CHECK-NEXT: movntiq %rax, 24(%rdi)
+; CHECK-NEXT: movntiq %rax, 16(%rdi)
+; CHECK-NEXT: movntiq %rax, 40(%rdi)
+; CHECK-NEXT: movntiq %rax, 32(%rdi)
+; CHECK-NEXT: movntiq %rax, 56(%rdi)
+; CHECK-NEXT: movntiq %rax, 48(%rdi)
+; CHECK-NEXT: retq
store <64 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1
ret void
}
@@ -1214,3 +772,7 @@ define void @test_zero_v64i8_align32(ptr %dst) nounwind {
}
!1 = !{i32 1}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SSE2: {{.*}}
+; SSE41: {{.*}}
+; SSE4A: {{.*}}
diff --git a/llvm/test/CodeGen/X86/pr41619.ll b/llvm/test/CodeGen/X86/pr41619.ll
index 7d1d139a38a52..88dcd7798f0c3 100644
--- a/llvm/test/CodeGen/X86/pr41619.ll
+++ b/llvm/test/CodeGen/X86/pr41619.ll
@@ -7,10 +7,9 @@ define void @foo(double %arg) {
; CHECK: ## %bb.0: ## %bb
; CHECK-NEXT: vmovq %xmm0, %rax
; CHECK-NEXT: vmovd %eax, %xmm0
-; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vmovq %xmm0, %rax
; CHECK-NEXT: movl %eax, (%rax)
-; CHECK-NEXT: vmovlps %xmm1, (%rax)
+; CHECK-NEXT: movq $0, (%rax)
; CHECK-NEXT: retq
bb:
%tmp = bitcast double %arg to i64
diff --git a/llvm/test/CodeGen/X86/vec_zero_cse.ll b/llvm/test/CodeGen/X86/vec_zero_cse.ll
index 99185277ba745..800dd59c26266 100644
--- a/llvm/test/CodeGen/X86/vec_zero_cse.ll
+++ b/llvm/test/CodeGen/X86/vec_zero_cse.ll
@@ -15,8 +15,8 @@ define void @test1() {
; X32: # %bb.0:
; X32-NEXT: movl $0, M1+4
; X32-NEXT: movl $0, M1
-; X32-NEXT: xorps %xmm0, %xmm0
-; X32-NEXT: movlps %xmm0, M2
+; X32-NEXT: movl $0, M2+4
+; X32-NEXT: movl $0, M2
; X32-NEXT: retl
;
; X64-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 1b6d2a2c6298e..be4253b6d5d10 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -3225,7 +3225,7 @@ define void @PR43024() {
; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}+4(%rip), %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}+12(%rip), %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovss %xmm0, (%rax)
; AVX-NEXT: retq
store <4 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x0, float 0x0>, ptr undef, align 16
More information about the llvm-commits
mailing list