[llvm] 757c7c2 - [X86][SSE] Add SSE2 extract-concat tests
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 23 11:42:26 PDT 2020
Author: Simon Pilgrim
Date: 2020-04-23T19:40:34+01:00
New Revision: 757c7c244b70cb35269dfe95aa801fd0ea3c5a0c
URL: https://github.com/llvm/llvm-project/commit/757c7c244b70cb35269dfe95aa801fd0ea3c5a0c
DIFF: https://github.com/llvm/llvm-project/commit/757c7c244b70cb35269dfe95aa801fd0ea3c5a0c.diff
LOG: [X86][SSE] Add SSE2 extract-concat tests
Check pre-SSE41 codegen where we have less PEXTR*/PINSR* instructions
Added:
Modified:
llvm/test/CodeGen/X86/extract-concat.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/extract-concat.ll b/llvm/test/CodeGen/X86/extract-concat.ll
index b860b7281ee6..085560c1a504 100644
--- a/llvm/test/CodeGen/X86/extract-concat.ll
+++ b/llvm/test/CodeGen/X86/extract-concat.ll
@@ -1,10 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512F
define void @foo(<4 x float> %in, <4 x i8>* %out) {
+; SSE2-LABEL: foo:
+; SSE2: # %bb.0:
+; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT: shll $8, %ecx
+; SSE2-NEXT: orl %eax, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movl $65280, %eax # imm = 0xFF00
+; SSE2-NEXT: orl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT: pinsrw $1, %eax, %xmm0
+; SSE2-NEXT: movd %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
; SSE42-LABEL: foo:
; SSE42: # %bb.0:
; SSE42-NEXT: cvttps2dq %xmm0, %xmm0
@@ -39,22 +55,22 @@ define void @foo(<4 x float> %in, <4 x i8>* %out) {
}
define <16 x i64> @catcat(<4 x i64> %x) {
-; SSE42-LABEL: catcat:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movq %rdi, %rax
-; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
-; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; SSE42-NEXT: movdqa %xmm1, 112(%rdi)
-; SSE42-NEXT: movdqa %xmm1, 96(%rdi)
-; SSE42-NEXT: movdqa %xmm3, 80(%rdi)
-; SSE42-NEXT: movdqa %xmm3, 64(%rdi)
-; SSE42-NEXT: movdqa %xmm0, 48(%rdi)
-; SSE42-NEXT: movdqa %xmm0, 32(%rdi)
-; SSE42-NEXT: movdqa %xmm2, 16(%rdi)
-; SSE42-NEXT: movdqa %xmm2, (%rdi)
-; SSE42-NEXT: retq
+; SSE-LABEL: catcat:
+; SSE: # %bb.0:
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; SSE-NEXT: movdqa %xmm1, 112(%rdi)
+; SSE-NEXT: movdqa %xmm1, 96(%rdi)
+; SSE-NEXT: movdqa %xmm3, 80(%rdi)
+; SSE-NEXT: movdqa %xmm3, 64(%rdi)
+; SSE-NEXT: movdqa %xmm0, 48(%rdi)
+; SSE-NEXT: movdqa %xmm0, 32(%rdi)
+; SSE-NEXT: movdqa %xmm2, 16(%rdi)
+; SSE-NEXT: movdqa %xmm2, (%rdi)
+; SSE-NEXT: retq
;
; AVX1-LABEL: catcat:
; AVX1: # %bb.0:
@@ -93,24 +109,24 @@ define <16 x i64> @catcat(<4 x i64> %x) {
}
define <16 x i64> @load_catcat(<4 x i64>* %p) {
-; SSE42-LABEL: load_catcat:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movq %rdi, %rax
-; SSE42-NEXT: movdqa (%rsi), %xmm0
-; SSE42-NEXT: movdqa 16(%rsi), %xmm1
-; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
-; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; SSE42-NEXT: movdqa %xmm1, 112(%rdi)
-; SSE42-NEXT: movdqa %xmm1, 96(%rdi)
-; SSE42-NEXT: movdqa %xmm3, 80(%rdi)
-; SSE42-NEXT: movdqa %xmm3, 64(%rdi)
-; SSE42-NEXT: movdqa %xmm0, 48(%rdi)
-; SSE42-NEXT: movdqa %xmm0, 32(%rdi)
-; SSE42-NEXT: movdqa %xmm2, 16(%rdi)
-; SSE42-NEXT: movdqa %xmm2, (%rdi)
-; SSE42-NEXT: retq
+; SSE-LABEL: load_catcat:
+; SSE: # %bb.0:
+; SSE-NEXT: movq %rdi, %rax
+; SSE-NEXT: movdqa (%rsi), %xmm0
+; SSE-NEXT: movdqa 16(%rsi), %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; SSE-NEXT: movdqa %xmm1, 112(%rdi)
+; SSE-NEXT: movdqa %xmm1, 96(%rdi)
+; SSE-NEXT: movdqa %xmm3, 80(%rdi)
+; SSE-NEXT: movdqa %xmm3, 64(%rdi)
+; SSE-NEXT: movdqa %xmm0, 48(%rdi)
+; SSE-NEXT: movdqa %xmm0, 32(%rdi)
+; SSE-NEXT: movdqa %xmm2, 16(%rdi)
+; SSE-NEXT: movdqa %xmm2, (%rdi)
+; SSE-NEXT: retq
;
; AVX1-LABEL: load_catcat:
; AVX1: # %bb.0:
@@ -147,11 +163,11 @@ define <16 x i64> @load_catcat(<4 x i64>* %p) {
; the source ops are not an even multiple size of the result.
define <4 x i32> @cat_ext_straddle(<6 x i32>* %px, <6 x i32>* %py) {
-; SSE42-LABEL: cat_ext_straddle:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movaps 16(%rdi), %xmm0
-; SSE42-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
-; SSE42-NEXT: retq
+; SSE-LABEL: cat_ext_straddle:
+; SSE: # %bb.0:
+; SSE-NEXT: movaps 16(%rdi), %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: retq
;
; AVX-LABEL: cat_ext_straddle:
; AVX: # %bb.0:
More information about the llvm-commits
mailing list