[llvm] [SelectionDAG] Convert to or mask if all insertions are -1 (PR #138213)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 1 16:53:30 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/138213
>From a4760e9bc3b4216623949f35f57ccffaee8db304 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Thu, 1 May 2025 19:04:55 -0400
Subject: [PATCH] [SelectionDAG] Convert to or mask if all insertions are -1
We did this for 0 and and, but we can do this with or and -1.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +-
.../AArch64/vecreduce-and-legalization.ll | 16 +-
llvm/test/CodeGen/X86/avx-cvt-3.ll | 8 +-
llvm/test/CodeGen/X86/insertelement-ones.ll | 182 ++++--------------
4 files changed, 57 insertions(+), 163 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ea1435c3934be..1645acb9d3fd0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22974,7 +22974,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
// If all insertions are zero value, try to convert to AND mask.
- // TODO: Do this for -1 with OR mask?
if (!LegalOperations && llvm::isNullConstant(InVal) &&
all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
@@ -22987,6 +22986,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
DAG.getBuildVector(VT, DL, Mask));
}
+ // If all insertions are -1, try to convert to OR mask.
+ if (!LegalOperations && llvm::isAllOnesConstant(InVal) &&
+ all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
+ count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
+ SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
+ SmallVector<SDValue, 8> Mask(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I)
+ Mask[I] = Ops[I] ? AllOnes : Zero;
+ return DAG.getNode(ISD::OR, DL, VT, CurVec,
+ DAG.getBuildVector(VT, DL, Mask));
+ }
+
// Failed to find a match in the chain - bail.
break;
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index 7fa416e0dbcd5..d2f16721e6e47 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -101,19 +101,13 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-LABEL: test_v9i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-NEXT: mov v1.b[9], w8
-; CHECK-NEXT: mov v1.b[10], w8
-; CHECK-NEXT: mov v1.b[11], w8
-; CHECK-NEXT: mov v1.b[12], w8
-; CHECK-NEXT: mov v1.b[13], w8
-; CHECK-NEXT: mov v1.b[14], w8
-; CHECK-NEXT: mov v1.b[15], w8
+; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff00
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: orr v1.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: and x8, x9, x8, lsr #32
; CHECK-NEXT: and x8, x8, x8, lsr #16
; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: and w0, w8, w9
diff --git a/llvm/test/CodeGen/X86/avx-cvt-3.ll b/llvm/test/CodeGen/X86/avx-cvt-3.ll
index 87eabd9cb5521..760db4af1f1b4 100644
--- a/llvm/test/CodeGen/X86/avx-cvt-3.ll
+++ b/llvm/test/CodeGen/X86/avx-cvt-3.ll
@@ -48,17 +48,13 @@ define <8 x float> @sitofp_shuffle_zero_v8i32(<8 x i32> %a0) {
define <8 x float> @sitofp_insert_allbits_v8i32(<8 x i32> %a0) {
; X86-LABEL: sitofp_insert_allbits_v8i32:
; X86: # %bb.0:
-; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
-; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
+; X86-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_insert_allbits_v8i32:
; X64: # %bb.0:
-; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
-; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
+; X64-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
; X64-NEXT: retq
%1 = insertelement <8 x i32> %a0, i32 -1, i32 0
diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll
index cfcb8798e0b9c..1236ff76dacd2 100644
--- a/llvm/test/CodeGen/X86/insertelement-ones.ll
+++ b/llvm/test/CodeGen/X86/insertelement-ones.ll
@@ -150,59 +150,32 @@ define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
; SSE2-LABEL: insert_v8i32_x12345x7:
; SSE2: # %bb.0:
-; SSE2-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
-; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSE2-NEXT: movl $-1, %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
+; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_v8i32_x12345x7:
; SSE3: # %bb.0:
-; SSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
-; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSE3-NEXT: movl $-1, %eax
-; SSE3-NEXT: movd %eax, %xmm2
-; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
-; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
+; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_v8i32_x12345x7:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
-; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
-; SSSE3-NEXT: movl $-1, %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
+; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_v8i32_x12345x7:
; SSE41: # %bb.0:
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: retq
;
-; AVX1-LABEL: insert_v8i32_x12345x7:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: insert_v8i32_x12345x7:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: insert_v8i32_x12345x7:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
-; AVX512-NEXT: retq
+; AVX-LABEL: insert_v8i32_x12345x7:
+; AVX: # %bb.0:
+; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
%1 = insertelement <8 x i32> %a, i32 -1, i32 0
%2 = insertelement <8 x i32> %1, i32 -1, i32 6
ret <8 x i32> %2
@@ -211,35 +184,27 @@ define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
; SSE2-LABEL: insert_v8i16_x12345x7:
; SSE2: # %bb.0:
-; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
-; SSE2-NEXT: pinsrw $0, %eax, %xmm0
-; SSE2-NEXT: pinsrw $6, %eax, %xmm0
+; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_v8i16_x12345x7:
; SSE3: # %bb.0:
-; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
-; SSE3-NEXT: pinsrw $0, %eax, %xmm0
-; SSE3-NEXT: pinsrw $6, %eax, %xmm0
+; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_v8i16_x12345x7:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
-; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
-; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
+; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_v8i16_x12345x7:
; SSE41: # %bb.0:
-; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: insert_v8i16_x12345x7:
; AVX: # %bb.0:
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
+; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%1 = insertelement <8 x i16> %a, i16 -1, i32 0
%2 = insertelement <8 x i16> %1, i16 -1, i32 6
@@ -249,60 +214,32 @@ define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
; SSE2-LABEL: insert_v16i16_x12345x789ABCDEx:
; SSE2: # %bb.0:
-; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
-; SSE2-NEXT: pinsrw $0, %eax, %xmm0
-; SSE2-NEXT: pinsrw $6, %eax, %xmm0
-; SSE2-NEXT: pinsrw $7, %eax, %xmm1
+; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
; SSE3: # %bb.0:
-; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
-; SSE3-NEXT: pinsrw $0, %eax, %xmm0
-; SSE3-NEXT: pinsrw $6, %eax, %xmm0
-; SSE3-NEXT: pinsrw $7, %eax, %xmm1
+; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
-; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
-; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
-; SSSE3-NEXT: pinsrw $7, %eax, %xmm1
+; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_v16i16_x12345x789ABCDEx:
; SSE41: # %bb.0:
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: retq
;
-; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [65535,0,0,0]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
-; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: insert_v16i16_x12345x789ABCDEx:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
-; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
-; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX512-NEXT: retq
+; AVX-LABEL: insert_v16i16_x12345x789ABCDEx:
+; AVX: # %bb.0:
+; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
%1 = insertelement <16 x i16> %a, i16 -1, i32 0
%2 = insertelement <16 x i16> %1, i16 -1, i32 6
%3 = insertelement <16 x i16> %2, i16 -1, i32 15
@@ -313,33 +250,26 @@ define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
; SSE2: # %bb.0:
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
; SSE3: # %bb.0:
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
; SSSE3: # %bb.0:
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_v16i8_x123456789ABCDEx:
; SSE41: # %bb.0:
-; SSE41-NEXT: movl $255, %eax
-; SSE41-NEXT: pinsrb $0, %eax, %xmm0
-; SSE41-NEXT: pinsrb $15, %eax, %xmm0
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: insert_v16i8_x123456789ABCDEx:
; AVX: # %bb.0:
-; AVX-NEXT: movl $255, %eax
-; AVX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
-; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%1 = insertelement <16 x i8> %a, i8 -1, i32 0
%2 = insertelement <16 x i8> %1, i8 -1, i32 15
@@ -350,69 +280,31 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
; SSE2: # %bb.0:
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
-; SSE2-NEXT: orps %xmm2, %xmm0
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: orps %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
; SSE3: # %bb.0:
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE3-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
-; SSE3-NEXT: orps %xmm2, %xmm0
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT: orps %xmm2, %xmm1
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
; SSSE3: # %bb.0:
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
-; SSSE3-NEXT: orps %xmm2, %xmm0
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT: orps %xmm2, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
; SSE41: # %bb.0:
-; SSE41-NEXT: movl $255, %eax
-; SSE41-NEXT: pinsrb $0, %eax, %xmm0
-; SSE41-NEXT: pinsrb $15, %eax, %xmm0
-; SSE41-NEXT: pinsrb $14, %eax, %xmm1
-; SSE41-NEXT: pinsrb $15, %eax, %xmm1
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: retq
;
-; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [255,0,0,0]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movl $255, %eax
-; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
-; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
-; AVX512: # %bb.0:
-; AVX512-NEXT: movl $255, %eax
-; AVX512-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
-; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
+; AVX: # %bb.0:
+; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
%1 = insertelement <32 x i8> %a, i8 -1, i32 0
%2 = insertelement <32 x i8> %1, i8 -1, i32 15
%3 = insertelement <32 x i8> %2, i8 -1, i32 30
More information about the llvm-commits
mailing list