[llvm] [DAG] visitEXTRACT_VECTOR_ELT - constant fold legal fp imm values (PR #74304)

Tue Dec 5 04:25:47 PST 2023

================
@@ -508,347 +327,86 @@ define void @test_zero_v8f64_align1(ptr %dst) nounwind {
 }
 
 define void @test_zero_v16f32_align1(ptr %dst) nounwind {
-; SSE2-LABEL: test_zero_v16f32_align1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    xorl %eax, %eax
-; SSE2-NEXT:    movntiq %rax, 8(%rdi)
-; SSE2-NEXT:    movntiq %rax, (%rdi)
-; SSE2-NEXT:    movntiq %rax, 24(%rdi)
-; SSE2-NEXT:    movntiq %rax, 16(%rdi)
-; SSE2-NEXT:    movntiq %rax, 40(%rdi)
-; SSE2-NEXT:    movntiq %rax, 32(%rdi)
-; SSE2-NEXT:    movntiq %rax, 56(%rdi)
-; SSE2-NEXT:    movntiq %rax, 48(%rdi)
-; SSE2-NEXT:    retq
-;
-; SSE4A-LABEL: test_zero_v16f32_align1:
-; SSE4A:       # %bb.0:
-; SSE4A-NEXT:    xorl %eax, %eax
-; SSE4A-NEXT:    movntiq %rax, 8(%rdi)
-; SSE4A-NEXT:    movntiq %rax, 24(%rdi)
-; SSE4A-NEXT:    movntiq %rax, 40(%rdi)
-; SSE4A-NEXT:    movntiq %rax, 56(%rdi)
-; SSE4A-NEXT:    xorps %xmm0, %xmm0
-; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
-; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
-; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
-; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
-; SSE4A-NEXT:    retq
-;
-; SSE41-LABEL: test_zero_v16f32_align1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    xorl %eax, %eax
-; SSE41-NEXT:    movntiq %rax, 8(%rdi)
-; SSE41-NEXT:    movntiq %rax, (%rdi)
-; SSE41-NEXT:    movntiq %rax, 24(%rdi)
-; SSE41-NEXT:    movntiq %rax, 16(%rdi)
-; SSE41-NEXT:    movntiq %rax, 40(%rdi)
-; SSE41-NEXT:    movntiq %rax, 32(%rdi)
-; SSE41-NEXT:    movntiq %rax, 56(%rdi)
-; SSE41-NEXT:    movntiq %rax, 48(%rdi)
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: test_zero_v16f32_align1:
-; AVX:       # %bb.0:
-; AVX-NEXT:    xorl %eax, %eax
-; AVX-NEXT:    movntiq %rax, 8(%rdi)
-; AVX-NEXT:    movntiq %rax, (%rdi)
-; AVX-NEXT:    movntiq %rax, 24(%rdi)
-; AVX-NEXT:    movntiq %rax, 16(%rdi)
-; AVX-NEXT:    movntiq %rax, 40(%rdi)
-; AVX-NEXT:    movntiq %rax, 32(%rdi)
-; AVX-NEXT:    movntiq %rax, 56(%rdi)
-; AVX-NEXT:    movntiq %rax, 48(%rdi)
-; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_zero_v16f32_align1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    movntiq %rax, 8(%rdi)
-; AVX512-NEXT:    movntiq %rax, (%rdi)
-; AVX512-NEXT:    movntiq %rax, 24(%rdi)
-; AVX512-NEXT:    movntiq %rax, 16(%rdi)
-; AVX512-NEXT:    movntiq %rax, 40(%rdi)
-; AVX512-NEXT:    movntiq %rax, 32(%rdi)
-; AVX512-NEXT:    movntiq %rax, 56(%rdi)
-; AVX512-NEXT:    movntiq %rax, 48(%rdi)
-; AVX512-NEXT:    retq
+; CHECK-LABEL: test_zero_v16f32_align1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    movntiq %rax, 8(%rdi)
+; CHECK-NEXT:    movntiq %rax, (%rdi)
+; CHECK-NEXT:    movntiq %rax, 24(%rdi)
+; CHECK-NEXT:    movntiq %rax, 16(%rdi)
+; CHECK-NEXT:    movntiq %rax, 40(%rdi)
+; CHECK-NEXT:    movntiq %rax, 32(%rdi)
+; CHECK-NEXT:    movntiq %rax, 56(%rdi)
+; CHECK-NEXT:    movntiq %rax, 48(%rdi)
----------------
phoebewang wrote:

Oh, I see. I though they have 16B offset so should write 16B in a whole.

https://github.com/llvm/llvm-project/pull/74304