[llvm] 25f6464 - [X86] Fix one of the PMADDWD tests to not have dead code.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed May 13 14:05:54 PDT 2020
Author: Craig Topper
Date: 2020-05-13T14:05:07-07:00
New Revision: 25f6464b320274df9b7cabd5cf33152b56c0c9f6
URL: https://github.com/llvm/llvm-project/commit/25f6464b320274df9b7cabd5cf33152b56c0c9f6
DIFF: https://github.com/llvm/llvm-project/commit/25f6464b320274df9b7cabd5cf33152b56c0c9f6.diff
LOG: [X86] Fix one of the PMADDWD tests to not have dead code.
There are two reductions in this test. It looks like I intended
to combine them by packing one of them into the upper 32 bits of
the result. But the OR instruction was missing.
Added:
Modified:
llvm/test/CodeGen/X86/madd.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll
index 5846ebf42c75..5058cf3f05a6 100644
--- a/llvm/test/CodeGen/X86/madd.ll
+++ b/llvm/test/CodeGen/X86/madd.ll
@@ -2797,12 +2797,20 @@ define i64 @sum_and_sum_of_squares(i8* %a, i32 %n) {
; SSE2-NEXT: addq $-8, %rax
; SSE2-NEXT: jne .LBB33_1
; SSE2-NEXT: # %bb.2: # %middle.block
+; SSE2-NEXT: paddd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; SSE2-NEXT: paddd %xmm2, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,2,3]
+; SSE2-NEXT: paddd %xmm3, %xmm2
+; SSE2-NEXT: movd %xmm2, %ecx
; SSE2-NEXT: paddd %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; SSE2-NEXT: paddd %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: shlq $32, %rcx
+; SSE2-NEXT: orq %rcx, %rax
; SSE2-NEXT: retq
;
; AVX1-LABEL: sum_and_sum_of_squares:
@@ -2829,6 +2837,13 @@ define i64 @sum_and_sum_of_squares(i8* %a, i32 %n) {
; AVX1-NEXT: addq $-8, %rax
; AVX1-NEXT: jne .LBB33_1
; AVX1-NEXT: # %bb.2: # %middle.block
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
+; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vmovd %xmm1, %ecx
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -2836,6 +2851,8 @@ define i64 @sum_and_sum_of_squares(i8* %a, i32 %n) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: shlq $32, %rcx
+; AVX1-NEXT: orq %rcx, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -2855,6 +2872,13 @@ define i64 @sum_and_sum_of_squares(i8* %a, i32 %n) {
; AVX256-NEXT: addq $-8, %rax
; AVX256-NEXT: jne .LBB33_1
; AVX256-NEXT: # %bb.2: # %middle.block
+; AVX256-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX256-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX256-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; AVX256-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX256-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
+; AVX256-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX256-NEXT: vmovd %xmm1, %ecx
; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -2862,6 +2886,8 @@ define i64 @sum_and_sum_of_squares(i8* %a, i32 %n) {
; AVX256-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vmovd %xmm0, %eax
+; AVX256-NEXT: shlq $32, %rcx
+; AVX256-NEXT: orq %rcx, %rax
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
entry:
@@ -2901,5 +2927,6 @@ middle.block:
%tmp = zext i32 %8 to i64
%tmp28 = shl nuw i64 %tmp, 32
%tmp29 = zext i32 %9 to i64
- ret i64 %tmp29
+ %tmp30 = or i64 %tmp28, %tmp29
+ ret i64 %tmp30
}
More information about the llvm-commits
mailing list