[llvm] 71b823d - [X86][AVX] Add missing AVX1 PMULDQ combine tests
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 22 06:42:55 PDT 2021
Author: Simon Pilgrim
Date: 2021-03-22T13:41:41Z
New Revision: 71b823dd68f67d9594d83f8b33c46f7a60d1b305
URL: https://github.com/llvm/llvm-project/commit/71b823dd68f67d9594d83f8b33c46f7a60d1b305
DIFF: https://github.com/llvm/llvm-project/commit/71b823dd68f67d9594d83f8b33c46f7a60d1b305.diff
LOG: [X86][AVX] Add missing AVX1 PMULDQ combine tests
Yet another case of update_llc_test_checks.py not reporting when a RUN doesn't have any matching prefixes
Added:
Modified:
llvm/test/CodeGen/X86/combine-pmuldq.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll
index 4545a084aaaf..7868c8b21a93 100644
--- a/llvm/test/CodeGen/X86/combine-pmuldq.ll
+++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512VL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=AVX --check-prefix=AVX512DQVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512DQVL
define <2 x i64> @combine_shuffle_sext_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: combine_shuffle_sext_pmuldq:
@@ -66,6 +66,15 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1)
; SSE-NEXT: pmuludq %xmm3, %xmm1
; SSE-NEXT: retq
;
+; AVX1-LABEL: combine_shuffle_zero_pmuludq_256:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: combine_shuffle_zero_pmuludq_256:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
@@ -102,6 +111,22 @@ define <8 x i64> @combine_zext_pmuludq_256(<8 x i32> %a) {
; SSE-NEXT: pmuludq %xmm4, %xmm3
; SSE-NEXT: retq
;
+; AVX1-LABEL: combine_zext_pmuludq_256:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,2,3,3]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [715827883,715827883]
+; AVX1-NEXT: vpmuludq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: combine_zext_pmuludq_256:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -202,6 +227,32 @@ define i32 @PR43159(<4 x i32>* %a0) {
; SSE-NEXT: pextrd $3, %xmm0, %ecx
; SSE-NEXT: jmp foo # TAILCALL
;
+; AVX1-LABEL: PR43159:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vmovdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1645975491,344322273,2164392969,1916962805]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsrld $1, %xmm0, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0,1,2,3],xmm3[4,5],xmm0[6,7]
+; AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrld $7, %xmm0, %xmm1
+; AVX1-NEXT: vpsrld $6, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %edi
+; AVX1-NEXT: vpextrd $1, %xmm1, %esi
+; AVX1-NEXT: vpextrd $2, %xmm0, %edx
+; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT: jmp foo # TAILCALL
+;
; AVX2-LABEL: PR43159:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
@@ -317,6 +368,52 @@ define <8 x i32> @PR49658_zext(i32* %ptr, i32 %mul) {
; SSE-NEXT: # %bb.2: # %end
; SSE-NEXT: retq
;
+; AVX1-LABEL: PR49658_zext:
+; AVX1: # %bb.0: # %start
+; AVX1-NEXT: movl %esi, %eax
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
+; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: movq $-2097152, %rax # imm = 0xFFE00000
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
+; AVX1-NEXT: vpsrlq $32, %xmm9, %xmm8
+; AVX1-NEXT: .p2align 4, 0x90
+; AVX1-NEXT: .LBB7_1: # %loop
+; AVX1-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = mem[0],zero,mem[1],zero
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm5 = mem[0],zero,mem[1],zero
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm6 = mem[0],zero,mem[1],zero
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm7 = mem[0],zero,mem[1],zero
+; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm3
+; AVX1-NEXT: vpmuludq %xmm7, %xmm8, %xmm7
+; AVX1-NEXT: vpsllq $32, %xmm7, %xmm7
+; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm3
+; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm7
+; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm2
+; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX1-NEXT: vpaddq %xmm2, %xmm7, %xmm2
+; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
+; AVX1-NEXT: vpmuludq %xmm5, %xmm9, %xmm3
+; AVX1-NEXT: vpmuludq %xmm5, %xmm8, %xmm5
+; AVX1-NEXT: vpsllq $32, %xmm5, %xmm5
+; AVX1-NEXT: vpaddq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm5
+; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm6
+; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4
+; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm4[1,3],xmm3[1,3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: subq $-128, %rax
+; AVX1-NEXT: jne .LBB7_1
+; AVX1-NEXT: # %bb.2: # %end
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: PR49658_zext:
; AVX2: # %bb.0: # %start
; AVX2-NEXT: movl %esi, %eax
@@ -463,6 +560,64 @@ define <8 x i32> @PR49658_sext(i32* %ptr, i32 %mul) {
; SSE-NEXT: # %bb.2: # %end
; SSE-NEXT: retq
;
+; AVX1-LABEL: PR49658_sext:
+; AVX1: # %bb.0: # %start
+; AVX1-NEXT: movslq %esi, %rax
+; AVX1-NEXT: vmovq %rax, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
+; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: movq $-2097152, %rax # imm = 0xFFE00000
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
+; AVX1-NEXT: vpsrlq $32, %xmm9, %xmm8
+; AVX1-NEXT: .p2align 4, 0x90
+; AVX1-NEXT: .LBB8_1: # %loop
+; AVX1-NEXT: # =>This Inner Loop Header: Depth=1
+; AVX1-NEXT: vpmovsxdq 2097152(%rdi,%rax), %xmm4
+; AVX1-NEXT: vpmovsxdq 2097160(%rdi,%rax), %xmm5
+; AVX1-NEXT: vpmovsxdq 2097168(%rdi,%rax), %xmm6
+; AVX1-NEXT: vpmovsxdq 2097176(%rdi,%rax), %xmm7
+; AVX1-NEXT: vpsrlq $32, %xmm7, %xmm3
+; AVX1-NEXT: vpmuludq %xmm3, %xmm9, %xmm3
+; AVX1-NEXT: vpmuludq %xmm7, %xmm8, %xmm2
+; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm3
+; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsrlq $32, %xmm6, %xmm3
+; AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm7
+; AVX1-NEXT: vpmuludq %xmm6, %xmm7, %xmm7
+; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm3
+; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3
+; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm6
+; AVX1-NEXT: vpaddq %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm3[1,3],xmm2[1,3]
+; AVX1-NEXT: vpmuludq %xmm5, %xmm8, %xmm3
+; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm6
+; AVX1-NEXT: vpmuludq %xmm6, %xmm9, %xmm6
+; AVX1-NEXT: vpaddq %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3
+; AVX1-NEXT: vpmuludq %xmm5, %xmm9, %xmm5
+; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm5
+; AVX1-NEXT: vpmuludq %xmm4, %xmm5, %xmm5
+; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm6
+; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm6
+; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpsllq $32, %xmm5, %xmm5
+; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm4
+; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm4[1,3],xmm3[1,3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpaddd %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: subq $-128, %rax
+; AVX1-NEXT: jne .LBB8_1
+; AVX1-NEXT: # %bb.2: # %end
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: PR49658_sext:
; AVX2: # %bb.0: # %start
; AVX2-NEXT: movslq %esi, %rax
More information about the llvm-commits
mailing list