[llvm] r350235 - [DAGCombiner][X86][PowerPC] Teach visitSIGN_EXTEND_INREG to fold (sext_in_reg (aext/sext x)) -> (sext x) when x has more than 1 sign bit and the sext_inreg is from one of them.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 2 09:58:28 PST 2019
Author: ctopper
Date: Wed Jan 2 09:58:27 2019
New Revision: 350235
URL: http://llvm.org/viewvc/llvm-project?rev=350235&view=rev
Log:
[DAGCombiner][X86][PowerPC] Teach visitSIGN_EXTEND_INREG to fold (sext_in_reg (aext/sext x)) -> (sext x) when x has more than 1 sign bit and the sext_inreg is from one of them.
If x has multiple sign bits than it doesn't matter which one we extend from so we can sext from x's msb instead.
The X86 setcc-combine.ll changes are a little weird. It appears we ended up with a (sext_inreg (aext (trunc (extractelt)))) after type legalization. The sext_inreg+aext now gets optimized by this combine to leave (sext (trunc (extractelt))). Then we visit the trunc before we visit the sext. This ends up changing the truncate to an extractvectorelt from a bitcasted vector. I have a follow up patch to fix this.
Differential Revision: https://reviews.llvm.org/D56156
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll
llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-setb.ll
llvm/trunk/test/CodeGen/X86/setcc-combine.ll
llvm/trunk/test/CodeGen/X86/vsel-cmp-load.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=350235&r1=350234&r2=350235&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Jan 2 09:58:27 2019
@@ -9414,10 +9414,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND_IN
// fold (sext_in_reg (sext x)) -> (sext x)
// fold (sext_in_reg (aext x)) -> (sext x)
- // if x is small enough.
+ // if x is small enough or if we know that x has more than 1 sign bit and the
+ // sign_extend_inreg is extending from one of them.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getScalarValueSizeInBits() <= EVTBits &&
+ unsigned N00Bits = N00.getScalarValueSizeInBits();
+ if ((N00Bits <= EVTBits ||
+ (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
Modified: llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll?rev=350235&r1=350234&r2=350235&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-truncateNconv.ll Wed Jan 2 09:58:27 2019
@@ -355,7 +355,7 @@ define signext i16 @qpConv2shw(fp128* no
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: xscvqpswz v2, v2
; CHECK-NEXT: mfvsrwz r3, v2
-; CHECK-NEXT: extsh r3, r3
+; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
@@ -393,7 +393,7 @@ define signext i16 @qpConv2shw_03(fp128*
; CHECK-NEXT: xsaddqp v2, v2, v3
; CHECK-NEXT: xscvqpswz v2, v2
; CHECK-NEXT: mfvsrwz r3, v2
-; CHECK-NEXT: extsh r3, r3
+; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
@@ -509,7 +509,7 @@ define signext i8 @qpConv2sb(fp128* noca
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: xscvqpswz v2, v2
; CHECK-NEXT: mfvsrwz r3, v2
-; CHECK-NEXT: extsb r3, r3
+; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
@@ -547,7 +547,7 @@ define signext i8 @qpConv2sb_03(fp128* n
; CHECK-NEXT: xsaddqp v2, v2, v3
; CHECK-NEXT: xscvqpswz v2, v2
; CHECK-NEXT: mfvsrwz r3, v2
-; CHECK-NEXT: extsb r3, r3
+; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
entry:
%0 = load fp128, fp128* %a, align 16
Modified: llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-setb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-setb.ll?rev=350235&r1=350234&r2=350235&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-setb.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-setb.ll Wed Jan 2 09:58:27 2019
@@ -744,14 +744,14 @@ define i64 @setb28(i64 %a, i64 %b) {
; CHECK-NOT: addic
; CHECK-NOT: subfe
; CHECK-NOT: isel
-; CHECK: extsh
+; CHECK: extsw
; CHECK: blr
; CHECK-PWR8-LABEL: setb28
; CHECK-PWR8-DAG: cmpd
; CHECK-PWR8-DAG: addic
; CHECK-PWR8-DAG: subfe
; CHECK-PWR8: isel
-; CHECK-PWR8: extsh
+; CHECK-PWR8: extsw
; CHECK-PWR8: blr
}
Modified: llvm/trunk/test/CodeGen/X86/setcc-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc-combine.ll?rev=350235&r1=350234&r2=350235&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/setcc-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/setcc-combine.ll Wed Jan 2 09:58:27 2019
@@ -8,8 +8,8 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_1:
@@ -17,7 +17,8 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: pextrb $4, %xmm0, %eax
+; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -31,14 +32,15 @@ define i32 @test_ne_1(<4 x i32> %A, <4 x
; SSE2-LABEL: test_ne_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm1, %eax
+; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ne_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE41-NEXT: pextrd $1, %xmm1, %eax
+; SSE41-NEXT: pextrb $4, %xmm1, %eax
+; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -67,8 +69,8 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ge_1:
@@ -76,7 +78,8 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: pextrb $4, %xmm0, %eax
+; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -90,14 +93,15 @@ define i32 @test_lt_1(<4 x i32> %A, <4 x
; SSE2-LABEL: test_lt_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm1, %eax
+; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_lt_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE41-NEXT: pextrd $1, %xmm1, %eax
+; SSE41-NEXT: pextrb $4, %xmm1, %eax
+; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -126,8 +130,8 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm1, %eax
+; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_2:
@@ -135,7 +139,8 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrd $1, %xmm1, %eax
+; SSE41-NEXT: pextrb $4, %xmm1, %eax
+; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -149,14 +154,15 @@ define i32 @test_ne_2(<4 x i32> %A, <4 x
; SSE2-LABEL: test_ne_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ne_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: pextrb $4, %xmm0, %eax
+; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -172,8 +178,8 @@ define i32 @test_le_2(<4 x i32> %A, <4 x
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm1, %eax
+; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_le_2:
@@ -181,7 +187,8 @@ define i32 @test_le_2(<4 x i32> %A, <4 x
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm0, %xmm1
-; SSE41-NEXT: pextrd $1, %xmm1, %eax
+; SSE41-NEXT: pextrb $4, %xmm1, %eax
+; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -208,14 +215,15 @@ define i32 @test_lt_2(<4 x i32> %A, <4 x
; SSE2-LABEL: test_lt_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_lt_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: pextrb $4, %xmm0, %eax
+; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@@ -229,14 +237,15 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x
; SSE2-LABEL: test_gt_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_gt_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: pextrb $4, %xmm0, %eax
+; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
Modified: llvm/trunk/test/CodeGen/X86/vsel-cmp-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vsel-cmp-load.ll?rev=350235&r1=350234&r2=350235&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vsel-cmp-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vsel-cmp-load.ll Wed Jan 2 09:58:27 2019
@@ -11,10 +11,9 @@ define <8 x i32> @eq_zero(<8 x i8>* %p,
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; AVX1-NEXT: vpslld $24, %xmm3, %xmm3
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; AVX1-NEXT: vpslld $24, %xmm2, %xmm2
+; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -115,12 +114,9 @@ define <8 x i32> @slt_zero(<8 x i8>* %p,
; AVX1-LABEL: slt_zero:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovsxbw (%rdi), %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; AVX1-NEXT: vpslld $24, %xmm3, %xmm3
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; AVX1-NEXT: vpslld $24, %xmm2, %xmm2
+; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -184,10 +180,9 @@ define <8 x float> @ne_zero_fp_select(<8
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; AVX1-NEXT: vpslld $24, %xmm3, %xmm3
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; AVX1-NEXT: vpslld $24, %xmm2, %xmm2
+; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
More information about the llvm-commits
mailing list