[llvm] r368438 - [X86][SSE] Swap X86ISD::BLENDV inputs with an inverted selection mask (PR42825)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 9 05:44:20 PDT 2019
Author: rksimon
Date: Fri Aug 9 05:44:20 2019
New Revision: 368438
URL: http://llvm.org/viewvc/llvm-project?rev=368438&view=rev
Log:
[X86][SSE] Swap X86ISD::BLENDV inputs with an inverted selection mask (PR42825)
As discussed on PR42825, if we are inverting the selection mask we can just swap the inputs and avoid the inversion.
Differential Revision: https://reviews.llvm.org/D65522
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll
llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=368438&r1=368437&r2=368438&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 9 05:44:20 2019
@@ -36615,6 +36615,12 @@ static SDValue combineSelect(SDNode *N,
if (SDValue V = narrowVectorSelect(N, DAG, Subtarget))
return V;
+ // select(~Cond, X, Y) -> select(Cond, Y, X)
+ if (CondVT.getScalarType() != MVT::i1)
+ if (SDValue CondNot = IsNOT(Cond, DAG))
+ return DAG.getNode(N->getOpcode(), DL, VT,
+ DAG.getBitcast(CondVT, CondNot), RHS, LHS);
+
// Custom action for SELECT MMX
if (VT == MVT::x86mmx) {
LHS = DAG.getBitcast(MVT::i64, LHS);
Modified: llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll?rev=368438&r1=368437&r2=368438&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-sse41-intrinsics.ll Fri Aug 9 05:44:20 2019
@@ -157,10 +157,9 @@ define <16 x i8> @xor_pblendvb(<16 x i8>
; CHECK-LABEL: xor_pblendvb:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa %xmm0, %xmm3
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
-; CHECK-NEXT: pblendvb %xmm0, %xmm1, %xmm3
-; CHECK-NEXT: movdqa %xmm3, %xmm0
+; CHECK-NEXT: movaps %xmm2, %xmm0
+; CHECK-NEXT: pblendvb %xmm0, %xmm3, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = xor <16 x i8> %a2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%2 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %1)
@@ -170,11 +169,10 @@ define <16 x i8> @xor_pblendvb(<16 x i8>
define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
; CHECK-LABEL: xor_blendvps:
; CHECK: # %bb.0:
-; CHECK-NEXT: movdqa %xmm0, %xmm3
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
-; CHECK-NEXT: blendvps %xmm0, %xmm1, %xmm3
-; CHECK-NEXT: movaps %xmm3, %xmm0
+; CHECK-NEXT: movaps %xmm0, %xmm3
+; CHECK-NEXT: movaps %xmm2, %xmm0
+; CHECK-NEXT: blendvps %xmm0, %xmm3, %xmm1
+; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <4 x float> %a2 to <4 x i32>
%2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -186,11 +184,10 @@ define <4 x float> @xor_blendvps(<4 x fl
define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
; CHECK-LABEL: xor_blendvpd:
; CHECK: # %bb.0:
-; CHECK-NEXT: movdqa %xmm0, %xmm3
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
-; CHECK-NEXT: blendvpd %xmm0, %xmm1, %xmm3
-; CHECK-NEXT: movapd %xmm3, %xmm0
+; CHECK-NEXT: movapd %xmm0, %xmm3
+; CHECK-NEXT: movaps %xmm2, %xmm0
+; CHECK-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <2 x double> %a2 to <4 x i32>
%2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
Modified: llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll?rev=368438&r1=368437&r2=368438&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll Fri Aug 9 05:44:20 2019
@@ -1852,25 +1852,20 @@ define <16 x i32> @test_masked_v16i32(i8
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
-; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm4
; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm5
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4
-; AVX1-NEXT: vblendvps %ymm3, %ymm4, %ymm1, %ymm1
+; AVX1-NEXT: vblendvps %ymm3, %ymm1, %ymm4, %ymm1
; AVX1-NEXT: vmovntdqa (%rdi), %xmm3
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
-; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm3, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_masked_v16i32:
More information about the llvm-commits
mailing list