[llvm] 99fd62a - [X86] Add tests for constant nontemporal vector stores

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 4 03:11:44 PST 2023


Author: Simon Pilgrim
Date: 2023-12-04T11:11:34Z
New Revision: 99fd62adff24724547f6ba7716cca2e37c640667

URL: https://github.com/llvm/llvm-project/commit/99fd62adff24724547f6ba7716cca2e37c640667
DIFF: https://github.com/llvm/llvm-project/commit/99fd62adff24724547f6ba7716cca2e37c640667.diff

LOG: [X86] Add tests for constant nontemporal vector stores

Extends the zero-vector test coverage in nontemporal-3.ll

Added: 
    llvm/test/CodeGen/X86/nontemporal-4.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/nontemporal-4.ll b/llvm/test/CodeGen/X86/nontemporal-4.ll
new file mode 100644
index 0000000000000..0f42a9a9cb719
--- /dev/null
+++ b/llvm/test/CodeGen/X86/nontemporal-4.ll
@@ -0,0 +1,1749 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSE4A
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
+
+; Test codegen for under aligned nontemporal vector stores
+
+; XMM versions.
+
+define void @test_constant_v2f64_align1(ptr %dst) nounwind {
+; CHECK-LABEL: test_constant_v2f64_align1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movabsq $4611686018427387904, %rax # imm = 0x4000000000000000
+; CHECK-NEXT:    movntiq %rax, 8(%rdi)
+; CHECK-NEXT:    movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000
+; CHECK-NEXT:    movntiq %rax, (%rdi)
+; CHECK-NEXT:    retq
+  store <2 x double> <double 1.0, double 2.0>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v4f32_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v4f32_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $4647714816524288000, %rax # imm = 0x4080000040400000
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $4611686019492741120, %rax # imm = 0x400000003F800000
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v4f32_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v4f32_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $4647714816524288000, %rax # imm = 0x4080000040400000
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $4611686019492741120, %rax # imm = 0x400000003F800000
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v4f32_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $4647714816524288000, %rax # imm = 0x4080000040400000
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $4611686019492741120, %rax # imm = 0x400000003F800000
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v4f32_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $4647714816524288000, %rax # imm = 0x4080000040400000
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $4611686019492741120, %rax # imm = 0x400000003F800000
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    retq
+  store <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v2i64_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v2i64_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movl $1, %eax
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v2i64_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    xorl %eax, %eax
+; SSE4A-NEXT:    movntiq %rax, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v2i64_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movl $1, %eax
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    xorl %eax, %eax
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v2i64_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movl $1, %eax
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v2i64_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movl $1, %eax
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    retq
+  store <2 x i64> <i64 0, i64 1>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v4i32_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v4i32_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $12884901890, %rax # imm = 0x300000002
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v4i32_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v4i32_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $12884901890, %rax # imm = 0x300000002
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v4i32_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $12884901890, %rax # imm = 0x300000002
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v4i32_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $12884901890, %rax # imm = 0x300000002
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    retq
+  store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8i16_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v8i16_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $1970350607106052, %rax # imm = 0x7000600050004
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $844433520132096, %rax # imm = 0x3000200010000
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v8i16_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v8i16_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $1970350607106052, %rax # imm = 0x7000600050004
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $844433520132096, %rax # imm = 0x3000200010000
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8i16_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $1970350607106052, %rax # imm = 0x7000600050004
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $844433520132096, %rax # imm = 0x3000200010000
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8i16_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $1970350607106052, %rax # imm = 0x7000600050004
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $844433520132096, %rax # imm = 0x3000200010000
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    retq
+  store <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v16i8_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v16i8_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $506097522914230528, %rax # imm = 0x706050403020100
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v16i8_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v16i8_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $506097522914230528, %rax # imm = 0x706050403020100
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v16i8_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $506097522914230528, %rax # imm = 0x706050403020100
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v16i8_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $506097522914230528, %rax # imm = 0x706050403020100
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    retq
+  store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+; YMM versions.
+
+define void @test_constant_v4f64_align1(ptr %dst) nounwind {
+; CHECK-LABEL: test_constant_v4f64_align1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movabsq $-4616189618054758400, %rax # imm = 0xBFF0000000000000
+; CHECK-NEXT:    movntiq %rax, 8(%rdi)
+; CHECK-NEXT:    movabsq $-4611686018427387904, %rax # imm = 0xC000000000000000
+; CHECK-NEXT:    movntiq %rax, (%rdi)
+; CHECK-NEXT:    movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000
+; CHECK-NEXT:    movntiq %rax, 24(%rdi)
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    movntiq %rax, 16(%rdi)
+; CHECK-NEXT:    retq
+  store <4 x double> <double -2.0, double -1.0, double 0.0, double 1.0>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8f32_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v8f32_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v8f32_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v8f32_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8f32_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8f32_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    retq
+  store <8 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v4i64_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v4i64_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq $-1, %rax
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movq $-3, %rax
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movq $-2, %rax
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v4i64_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    xorl %eax, %eax
+; SSE4A-NEXT:    movntiq %rax, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v4i64_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movq $-1, %rax
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movq $-3, %rax
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movq $-2, %rax
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    xorl %eax, %eax
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v4i64_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movq $-1, %rax
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movq $-3, %rax
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movq $-2, %rax
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v4i64_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movq $-1, %rax
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movq $-3, %rax
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movq $-2, %rax
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    retq
+  store <4 x i64> <i64 0, i64 -1, i64 -2, i64 -3>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8i32_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v8i32_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v8i32_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v8i32_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8i32_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8i32_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    retq
+  store <8 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v16i16_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v16i16_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v16i16_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v16i16_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v16i16_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v16i16_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    retq
+  store <16 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v32i8_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v32i8_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v32i8_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v32i8_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v32i8_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v32i8_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    retq
+  store <32 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v4f64_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v4f64_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v4f64_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v4f64_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    retq
+  store <4 x double> <double -2.0, double -1.0, double 0.0, double 1.0>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8f32_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v8f32_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8f32_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8f32_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    retq
+  store <8 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v4i64_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v4i64_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v4i64_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v4i64_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    retq
+  store <4 x i64> <i64 0, i64 -1, i64 -2, i64 -3>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8i32_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v8i32_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8i32_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8i32_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    retq
+  store <8 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v16i16_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v16i16_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v16i16_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v16i16_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    retq
+  store <16 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v32i8_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v32i8_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v32i8_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v32i8_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    retq
+  store <32 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+; ZMM versions.
+
+define void @test_constant_v8f64_align1(ptr %dst) nounwind {
+; CHECK-LABEL: test_constant_v8f64_align1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movabsq $-4616189618054758400, %rax # imm = 0xBFF0000000000000
+; CHECK-NEXT:    movntiq %rax, 8(%rdi)
+; CHECK-NEXT:    movabsq $-4611686018427387904, %rax # imm = 0xC000000000000000
+; CHECK-NEXT:    movntiq %rax, (%rdi)
+; CHECK-NEXT:    movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000
+; CHECK-NEXT:    movntiq %rax, 24(%rdi)
+; CHECK-NEXT:    movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
+; CHECK-NEXT:    movntiq %rax, 40(%rdi)
+; CHECK-NEXT:    movabsq $4611686018427387904, %rax # imm = 0x4000000000000000
+; CHECK-NEXT:    movntiq %rax, 32(%rdi)
+; CHECK-NEXT:    movabsq $4617315517961601024, %rax # imm = 0x4014000000000000
+; CHECK-NEXT:    movntiq %rax, 56(%rdi)
+; CHECK-NEXT:    movabsq $4616189618054758400, %rax # imm = 0x4010000000000000
+; CHECK-NEXT:    movntiq %rax, 48(%rdi)
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    movntiq %rax, 16(%rdi)
+; CHECK-NEXT:    retq
+  store <8 x double> <double -2.0, double -1.0, double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v16f32_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v16f32_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000
+; SSE2-NEXT:    movntiq %rax, 40(%rdi)
+; SSE2-NEXT:    movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000
+; SSE2-NEXT:    movntiq %rax, 32(%rdi)
+; SSE2-NEXT:    movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000
+; SSE2-NEXT:    movntiq %rax, 56(%rdi)
+; SSE2-NEXT:    movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000
+; SSE2-NEXT:    movntiq %rax, 48(%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v16f32_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v16f32_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000
+; SSE41-NEXT:    movntiq %rax, 40(%rdi)
+; SSE41-NEXT:    movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000
+; SSE41-NEXT:    movntiq %rax, 32(%rdi)
+; SSE41-NEXT:    movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000
+; SSE41-NEXT:    movntiq %rax, 56(%rdi)
+; SSE41-NEXT:    movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000
+; SSE41-NEXT:    movntiq %rax, 48(%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v16f32_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000
+; AVX-NEXT:    movntiq %rax, 40(%rdi)
+; AVX-NEXT:    movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000
+; AVX-NEXT:    movntiq %rax, 32(%rdi)
+; AVX-NEXT:    movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000
+; AVX-NEXT:    movntiq %rax, 56(%rdi)
+; AVX-NEXT:    movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000
+; AVX-NEXT:    movntiq %rax, 48(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v16f32_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000
+; AVX512-NEXT:    movntiq %rax, 40(%rdi)
+; AVX512-NEXT:    movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000
+; AVX512-NEXT:    movntiq %rax, 32(%rdi)
+; AVX512-NEXT:    movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000
+; AVX512-NEXT:    movntiq %rax, 56(%rdi)
+; AVX512-NEXT:    movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000
+; AVX512-NEXT:    movntiq %rax, 48(%rdi)
+; AVX512-NEXT:    retq
+  store <16 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0, float -7.0, float -8.0, float -9.0, float -10.0, float -11.0, float -12.0, float -13.0, float -14.0>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8i64_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v8i64_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq $-1, %rax
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movq $-3, %rax
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movq $-2, %rax
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    movq $-5, %rax
+; SSE2-NEXT:    movntiq %rax, 40(%rdi)
+; SSE2-NEXT:    movq $-4, %rax
+; SSE2-NEXT:    movntiq %rax, 32(%rdi)
+; SSE2-NEXT:    movq $-7, %rax
+; SSE2-NEXT:    movntiq %rax, 56(%rdi)
+; SSE2-NEXT:    movq $-6, %rax
+; SSE2-NEXT:    movntiq %rax, 48(%rdi)
+; SSE2-NEXT:    xorl %eax, %eax
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v8i64_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    xorl %eax, %eax
+; SSE4A-NEXT:    movntiq %rax, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v8i64_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movq $-1, %rax
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movq $-3, %rax
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movq $-2, %rax
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    movq $-5, %rax
+; SSE41-NEXT:    movntiq %rax, 40(%rdi)
+; SSE41-NEXT:    movq $-4, %rax
+; SSE41-NEXT:    movntiq %rax, 32(%rdi)
+; SSE41-NEXT:    movq $-7, %rax
+; SSE41-NEXT:    movntiq %rax, 56(%rdi)
+; SSE41-NEXT:    movq $-6, %rax
+; SSE41-NEXT:    movntiq %rax, 48(%rdi)
+; SSE41-NEXT:    xorl %eax, %eax
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8i64_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movq $-1, %rax
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movq $-3, %rax
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movq $-2, %rax
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    movq $-5, %rax
+; AVX-NEXT:    movntiq %rax, 40(%rdi)
+; AVX-NEXT:    movq $-4, %rax
+; AVX-NEXT:    movntiq %rax, 32(%rdi)
+; AVX-NEXT:    movq $-7, %rax
+; AVX-NEXT:    movntiq %rax, 56(%rdi)
+; AVX-NEXT:    movq $-6, %rax
+; AVX-NEXT:    movntiq %rax, 48(%rdi)
+; AVX-NEXT:    xorl %eax, %eax
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8i64_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movq $-1, %rax
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movq $-3, %rax
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movq $-2, %rax
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    movq $-5, %rax
+; AVX512-NEXT:    movntiq %rax, 40(%rdi)
+; AVX512-NEXT:    movq $-4, %rax
+; AVX512-NEXT:    movntiq %rax, 32(%rdi)
+; AVX512-NEXT:    movq $-7, %rax
+; AVX512-NEXT:    movntiq %rax, 56(%rdi)
+; AVX512-NEXT:    movq $-6, %rax
+; AVX512-NEXT:    movntiq %rax, 48(%rdi)
+; AVX512-NEXT:    xorl %eax, %eax
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    retq
+  store <8 x i64> <i64 0, i64 -1, i64 -2, i64 -3, i64 -4, i64 -5, i64 -6, i64 -7>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v16i32_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v16i32_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6
+; SSE2-NEXT:    movntiq %rax, 40(%rdi)
+; SSE2-NEXT:    movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8
+; SSE2-NEXT:    movntiq %rax, 32(%rdi)
+; SSE2-NEXT:    movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2
+; SSE2-NEXT:    movntiq %rax, 56(%rdi)
+; SSE2-NEXT:    movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4
+; SSE2-NEXT:    movntiq %rax, 48(%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v16i32_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v16i32_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6
+; SSE41-NEXT:    movntiq %rax, 40(%rdi)
+; SSE41-NEXT:    movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8
+; SSE41-NEXT:    movntiq %rax, 32(%rdi)
+; SSE41-NEXT:    movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2
+; SSE41-NEXT:    movntiq %rax, 56(%rdi)
+; SSE41-NEXT:    movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4
+; SSE41-NEXT:    movntiq %rax, 48(%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v16i32_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6
+; AVX-NEXT:    movntiq %rax, 40(%rdi)
+; AVX-NEXT:    movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8
+; AVX-NEXT:    movntiq %rax, 32(%rdi)
+; AVX-NEXT:    movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2
+; AVX-NEXT:    movntiq %rax, 56(%rdi)
+; AVX-NEXT:    movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4
+; AVX-NEXT:    movntiq %rax, 48(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v16i32_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6
+; AVX512-NEXT:    movntiq %rax, 40(%rdi)
+; AVX512-NEXT:    movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8
+; AVX512-NEXT:    movntiq %rax, 32(%rdi)
+; AVX512-NEXT:    movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2
+; AVX512-NEXT:    movntiq %rax, 56(%rdi)
+; AVX512-NEXT:    movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4
+; AVX512-NEXT:    movntiq %rax, 48(%rdi)
+; AVX512-NEXT:    retq
+  store <16 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7, i32 -8, i32 -9, i32 -10, i32 -11, i32 -12, i32 -13, i32 -14, i32 -15>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v32i16_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v32i16_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC
+; SSE2-NEXT:    movntiq %rax, 40(%rdi)
+; SSE2-NEXT:    movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0
+; SSE2-NEXT:    movntiq %rax, 32(%rdi)
+; SSE2-NEXT:    movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4
+; SSE2-NEXT:    movntiq %rax, 56(%rdi)
+; SSE2-NEXT:    movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8
+; SSE2-NEXT:    movntiq %rax, 48(%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v32i16_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v32i16_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC
+; SSE41-NEXT:    movntiq %rax, 40(%rdi)
+; SSE41-NEXT:    movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0
+; SSE41-NEXT:    movntiq %rax, 32(%rdi)
+; SSE41-NEXT:    movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4
+; SSE41-NEXT:    movntiq %rax, 56(%rdi)
+; SSE41-NEXT:    movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8
+; SSE41-NEXT:    movntiq %rax, 48(%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v32i16_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC
+; AVX-NEXT:    movntiq %rax, 40(%rdi)
+; AVX-NEXT:    movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0
+; AVX-NEXT:    movntiq %rax, 32(%rdi)
+; AVX-NEXT:    movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4
+; AVX-NEXT:    movntiq %rax, 56(%rdi)
+; AVX-NEXT:    movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8
+; AVX-NEXT:    movntiq %rax, 48(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v32i16_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC
+; AVX512-NEXT:    movntiq %rax, 40(%rdi)
+; AVX512-NEXT:    movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0
+; AVX512-NEXT:    movntiq %rax, 32(%rdi)
+; AVX512-NEXT:    movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4
+; AVX512-NEXT:    movntiq %rax, 56(%rdi)
+; AVX512-NEXT:    movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8
+; AVX512-NEXT:    movntiq %rax, 48(%rdi)
+; AVX512-NEXT:    retq
+  store <32 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -17, i16 -18, i16 -19, i16 -20, i16 -21, i16 -22, i16 -23, i16 -24, i16 -25, i16 -26, i16 -27, i16 -28, i16 -29, i16 -30, i16 -31>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v64i8_align1(ptr %dst) nounwind {
+; SSE2-LABEL: test_constant_v64i8_align1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
+; SSE2-NEXT:    movntiq %rax, 8(%rdi)
+; SSE2-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
+; SSE2-NEXT:    movntiq %rax, (%rdi)
+; SSE2-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
+; SSE2-NEXT:    movntiq %rax, 24(%rdi)
+; SSE2-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
+; SSE2-NEXT:    movntiq %rax, 16(%rdi)
+; SSE2-NEXT:    movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8
+; SSE2-NEXT:    movntiq %rax, 40(%rdi)
+; SSE2-NEXT:    movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0
+; SSE2-NEXT:    movntiq %rax, 32(%rdi)
+; SSE2-NEXT:    movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8
+; SSE2-NEXT:    movntiq %rax, 56(%rdi)
+; SSE2-NEXT:    movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0
+; SSE2-NEXT:    movntiq %rax, 48(%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_constant_v64i8_align1:
+; SSE4A:       # %bb.0:
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
+; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_constant_v64i8_align1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
+; SSE41-NEXT:    movntiq %rax, 8(%rdi)
+; SSE41-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
+; SSE41-NEXT:    movntiq %rax, (%rdi)
+; SSE41-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
+; SSE41-NEXT:    movntiq %rax, 24(%rdi)
+; SSE41-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
+; SSE41-NEXT:    movntiq %rax, 16(%rdi)
+; SSE41-NEXT:    movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8
+; SSE41-NEXT:    movntiq %rax, 40(%rdi)
+; SSE41-NEXT:    movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0
+; SSE41-NEXT:    movntiq %rax, 32(%rdi)
+; SSE41-NEXT:    movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8
+; SSE41-NEXT:    movntiq %rax, 56(%rdi)
+; SSE41-NEXT:    movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0
+; SSE41-NEXT:    movntiq %rax, 48(%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v64i8_align1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
+; AVX-NEXT:    movntiq %rax, 8(%rdi)
+; AVX-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
+; AVX-NEXT:    movntiq %rax, (%rdi)
+; AVX-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
+; AVX-NEXT:    movntiq %rax, 24(%rdi)
+; AVX-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
+; AVX-NEXT:    movntiq %rax, 16(%rdi)
+; AVX-NEXT:    movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8
+; AVX-NEXT:    movntiq %rax, 40(%rdi)
+; AVX-NEXT:    movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0
+; AVX-NEXT:    movntiq %rax, 32(%rdi)
+; AVX-NEXT:    movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8
+; AVX-NEXT:    movntiq %rax, 56(%rdi)
+; AVX-NEXT:    movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0
+; AVX-NEXT:    movntiq %rax, 48(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v64i8_align1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
+; AVX512-NEXT:    movntiq %rax, 8(%rdi)
+; AVX512-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
+; AVX512-NEXT:    movntiq %rax, (%rdi)
+; AVX512-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
+; AVX512-NEXT:    movntiq %rax, 24(%rdi)
+; AVX512-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
+; AVX512-NEXT:    movntiq %rax, 16(%rdi)
+; AVX512-NEXT:    movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8
+; AVX512-NEXT:    movntiq %rax, 40(%rdi)
+; AVX512-NEXT:    movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0
+; AVX512-NEXT:    movntiq %rax, 32(%rdi)
+; AVX512-NEXT:    movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8
+; AVX512-NEXT:    movntiq %rax, 56(%rdi)
+; AVX512-NEXT:    movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0
+; AVX512-NEXT:    movntiq %rax, 48(%rdi)
+; AVX512-NEXT:    retq
+  store <64 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31, i8 -32, i8 -33, i8 -34, i8 -35, i8 -36, i8 -37, i8 -38, i8 -39, i8 -40, i8 -41, i8 -42, i8 -43, i8 -44, i8 -45, i8 -46, i8 -47, i8 -48, i8 -49, i8 -50, i8 -51, i8 -52, i8 -53, i8 -54, i8 -55, i8 -56, i8 -57, i8 -58, i8 -59, i8 -60, i8 -61, i8 -62, i8 -63>, ptr %dst, align 1, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8f64_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v8f64_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8f64_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0]
+; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
+; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8f64_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0]
+; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
+; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    retq
+  store <8 x double> <double -2.0, double -1.0, double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v16f32_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v16f32_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v16f32_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
+; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1]
+; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v16f32_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
+; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1]
+; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX512-NEXT:    retq
+  store <16 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0, float -7.0, float -8.0, float -9.0, float -10.0, float -11.0, float -12.0, float -13.0, float -14.0>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8i64_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v8i64_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8i64_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609]
+; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611]
+; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8i64_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609]
+; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611]
+; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX512-NEXT:    retq
+  store <8 x i64> <i64 0, i64 -1, i64 -2, i64 -3, i64 -4, i64 -5, i64 -6, i64 -7>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v16i32_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v16i32_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v16i32_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281]
+; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285]
+; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v16i32_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281]
+; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285]
+; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX512-NEXT:    retq
+  store <16 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7, i32 -8, i32 -9, i32 -10, i32 -11, i32 -12, i32 -13, i32 -14, i32 -15>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v32i16_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v32i16_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v32i16_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505]
+; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513]
+; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v32i16_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505]
+; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513]
+; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX512-NEXT:    retq
+  store <32 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -17, i16 -18, i16 -19, i16 -20, i16 -21, i16 -22, i16 -23, i16 -24, i16 -25, i16 -26, i16 -27, i16 -28, i16 -29, i16 -30, i16 -31>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v64i8_align16(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v64i8_align16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v64i8_align16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
+; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
+; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209]
+; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v64i8_align16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
+; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
+; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
+; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209]
+; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
+; AVX512-NEXT:    retq
+  store <64 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31, i8 -32, i8 -33, i8 -34, i8 -35, i8 -36, i8 -37, i8 -38, i8 -39, i8 -40, i8 -41, i8 -42, i8 -43, i8 -44, i8 -45, i8 -46, i8 -47, i8 -48, i8 -49, i8 -50, i8 -51, i8 -52, i8 -53, i8 -54, i8 -55, i8 -56, i8 -57, i8 -58, i8 -59, i8 -60, i8 -61, i8 -62, i8 -63>, ptr %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8f64_align32(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v8f64_align32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8f64_align32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [2.0E+0,3.0E+0,4.0E+0,5.0E+0]
+; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [-2.0E+0,-1.0E+0,0.0E+0,1.0E+0]
+; AVX-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8f64_align32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [2.0E+0,3.0E+0,4.0E+0,5.0E+0]
+; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [-2.0E+0,-1.0E+0,0.0E+0,1.0E+0]
+; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  store <8 x double> <double -2.0, double -1.0, double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, ptr %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v16f32_align32(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v16f32_align32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v16f32_align32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1,-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
+; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0,-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
+; AVX-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v16f32_align32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1,-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
+; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0,-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
+; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  store <16 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0, float -7.0, float -8.0, float -9.0, float -10.0, float -11.0, float -12.0, float -13.0, float -14.0>, ptr %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v8i64_align32(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v8i64_align32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v8i64_align32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551612,18446744073709551611,18446744073709551610,18446744073709551609]
+; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551614,18446744073709551613]
+; AVX-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v8i64_align32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551612,18446744073709551611,18446744073709551610,18446744073709551609]
+; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551614,18446744073709551613]
+; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  store <8 x i64> <i64 0, i64 -1, i64 -2, i64 -3, i64 -4, i64 -5, i64 -6, i64 -7>, ptr %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v16i32_align32(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v16i32_align32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v16i32_align32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967288,4294967287,4294967286,4294967285,4294967284,4294967283,4294967282,4294967281]
+; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,4294967295,4294967294,4294967293,4294967292,4294967291,4294967290,4294967289]
+; AVX-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v16i32_align32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967288,4294967287,4294967286,4294967285,4294967284,4294967283,4294967282,4294967281]
+; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0,4294967295,4294967294,4294967293,4294967292,4294967291,4294967290,4294967289]
+; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  store <16 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7, i32 -8, i32 -9, i32 -10, i32 -11, i32 -12, i32 -13, i32 -14, i32 -15>, ptr %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v32i16_align32(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v32i16_align32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v32i16_align32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65520,65519,65518,65517,65516,65515,65514,65513,65512,65511,65510,65509,65508,65507,65506,65505]
+; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,65535,65534,65533,65532,65531,65530,65529,65528,65527,65526,65525,65524,65523,65522,65521]
+; AVX-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v32i16_align32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [65520,65519,65518,65517,65516,65515,65514,65513,65512,65511,65510,65509,65508,65507,65506,65505]
+; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0,65535,65534,65533,65532,65531,65530,65529,65528,65527,65526,65525,65524,65523,65522,65521]
+; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  store <32 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -17, i16 -18, i16 -19, i16 -20, i16 -21, i16 -22, i16 -23, i16 -24, i16 -25, i16 -26, i16 -27, i16 -28, i16 -29, i16 -30, i16 -31>, ptr %dst, align 32, !nontemporal !1
+  ret void
+}
+
+define void @test_constant_v64i8_align32(ptr %dst) nounwind {
+; SSE-LABEL: test_constant_v64i8_align32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
+; SSE-NEXT:    movntps %xmm0, 48(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209]
+; SSE-NEXT:    movntps %xmm0, 32(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
+; SSE-NEXT:    movntps %xmm0, 16(%rdi)
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_constant_v64i8_align32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209,208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
+; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241,240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
+; AVX-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: test_constant_v64i8_align32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209,208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
+; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
+; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241,240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
+; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  store <64 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31, i8 -32, i8 -33, i8 -34, i8 -35, i8 -36, i8 -37, i8 -38, i8 -39, i8 -40, i8 -41, i8 -42, i8 -43, i8 -44, i8 -45, i8 -46, i8 -47, i8 -48, i8 -49, i8 -50, i8 -51, i8 -52, i8 -53, i8 -54, i8 -55, i8 -56, i8 -57, i8 -58, i8 -59, i8 -60, i8 -61, i8 -62, i8 -63>, ptr %dst, align 32, !nontemporal !1
+  ret void
+}
+
+!1 = !{i32 1}


        


More information about the llvm-commits mailing list