[llvm] r374555 - [DAGCombiner] fold vselect-of-constants to shift
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 07:17:56 PDT 2019
Author: spatel
Date: Fri Oct 11 07:17:56 2019
New Revision: 374555
URL: http://llvm.org/viewvc/llvm-project?rev=374555&view=rev
Log:
[DAGCombiner] fold vselect-of-constants to shift
The diffs suggest that we are missing some more basic
analysis/transforms, but this keeps the vector path in
sync with the scalar (rL374397). This is again a
preliminary step for introducing the reverse transform
in IR as proposed in D63382.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll
llvm/trunk/test/CodeGen/X86/vselect.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=374555&r1=374554&r2=374555&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Oct 11 07:17:56 2019
@@ -8614,6 +8614,15 @@ SDValue DAGCombiner::foldVSelectOfConsta
return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
}
+ // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+ APInt Pow2C;
+ if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+ isNullOrNullSplat(N2)) {
+ SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+ }
+
// The general case for select-of-constants:
// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
// ...but that only makes sense if a vselect is slower than 2 logic ops, so
Modified: llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll?rev=374555&r1=374554&r2=374555&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll (original)
+++ llvm/trunk/test/CodeGen/X86/selectcc-to-shiftand.ll Fri Oct 11 07:17:56 2019
@@ -213,9 +213,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<
define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
; ANY-LABEL: sel_shift_bool_v8i16:
; ANY: # %bb.0:
-; ANY-NEXT: psllw $15, %xmm0
-; ANY-NEXT: psraw $15, %xmm0
; ANY-NEXT: pand {{.*}}(%rip), %xmm0
+; ANY-NEXT: psllw $7, %xmm0
; ANY-NEXT: retq
%shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
ret <8 x i16> %shl
@@ -224,9 +223,8 @@ define <8 x i16> @sel_shift_bool_v8i16(<
define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
; ANY-LABEL: sel_shift_bool_v4i32:
; ANY: # %bb.0:
-; ANY-NEXT: pslld $31, %xmm0
-; ANY-NEXT: psrad $31, %xmm0
; ANY-NEXT: pand {{.*}}(%rip), %xmm0
+; ANY-NEXT: pslld $6, %xmm0
; ANY-NEXT: retq
%shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
ret <4 x i32> %shl
@@ -235,10 +233,8 @@ define <4 x i32> @sel_shift_bool_v4i32(<
define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
; ANY-LABEL: sel_shift_bool_v2i64:
; ANY: # %bb.0:
-; ANY-NEXT: psllq $63, %xmm0
-; ANY-NEXT: psrad $31, %xmm0
-; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; ANY-NEXT: pand {{.*}}(%rip), %xmm0
+; ANY-NEXT: psllq $16, %xmm0
; ANY-NEXT: retq
%shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer
ret <2 x i64> %shl
Modified: llvm/trunk/test/CodeGen/X86/vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect.ll?rev=374555&r1=374554&r2=374555&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect.ll Fri Oct 11 07:17:56 2019
@@ -647,33 +647,22 @@ define void @vselect_allzeros_LHS_multip
; This test case previously crashed after r363802, r363850, and r363856 due
; any_extend_vector_inreg not being handled by the X86 backend.
define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) {
-; SSE2-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $24, %xmm0
-; SSE2-NEXT: movq %xmm0, %rax
-; SSE2-NEXT: andl $32768, %eax # imm = 0x8000
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE41-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pmovsxbq %xmm0, %xmm0
-; SSE41-NEXT: movq %xmm0, %rax
-; SSE41-NEXT: andl $32768, %eax # imm = 0x8000
-; SSE41-NEXT: retq
+; SSE-LABEL: vselect_any_extend_vector_inreg_crash:
+; SSE: # %bb.0:
+; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: shlq $15, %rax
+; SSE-NEXT: retq
;
; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpmovsxbq %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: andl $32768, %eax # imm = 0x8000
+; AVX-NEXT: andl $1, %eax
+; AVX-NEXT: shlq $15, %rax
; AVX-NEXT: retq
0:
%1 = load <8 x i8>, <8 x i8>* %x
More information about the llvm-commits
mailing list