[llvm] r324662 - [X86] Improve combineCastedMaskArithmetic to fold (bitcast (vXi1 (and/or/xor X, C)))->(vXi1 (and/or/xor (bitcast X), (bitcast C)) where C is a constant build_vector.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 8 14:26:39 PST 2018
Author: ctopper
Date: Thu Feb 8 14:26:39 2018
New Revision: 324662
URL: http://llvm.org/viewvc/llvm-project?rev=324662&view=rev
Log:
[X86] Improve combineCastedMaskArithmetic to fold (bitcast (vXi1 (and/or/xor X, C)))->(vXi1 (and/or/xor (bitcast X), (bitcast C)) where C is a constant build_vector.
Most vxi1 constant build vectors have to be implemented in the scalar domain anyway so we'll probably end up with a cast there later. But by then its too late to do the combine to get rid of it.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324662&r1=324661&r2=324662&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Feb 8 14:26:39 2018
@@ -30494,6 +30494,24 @@ static SDValue combineBitcastvxi1(Select
return DAG.getZExtOrTrunc(V, DL, VT);
}
+// Convert a vXi1 constant build vector to the same width scalar integer.
+static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) {
+ EVT SrcVT = Op.getValueType();
+ assert(SrcVT.getVectorElementType() == MVT::i1 &&
+ "Expected a vXi1 vector");
+ assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
+ "Expected a constant build vector");
+
+ APInt Imm(SrcVT.getVectorNumElements(), 0);
+ for (unsigned Idx = 0, e = Op.getNumOperands(); Idx < e; ++Idx) {
+ SDValue In = Op.getOperand(Idx);
+ if (!In.isUndef() && (cast<ConstantSDNode>(In)->getZExtValue() & 0x1))
+ Imm.setBit(Idx);
+ }
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), Imm.getBitWidth());
+ return DAG.getConstant(Imm, SDLoc(Op), IntVT);
+}
+
static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -30539,6 +30557,14 @@ static SDValue combineCastedMaskArithmet
return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT,
DAG.getBitcast(DstVT, LHS), RHS.getOperand(0));
+ // If the RHS is a vXi1 build vector, this is a good reason to flip too.
+ // Most of these have to move a constant from the scalar domain anyway.
+ if (ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) {
+ RHS = combinevXi1ConstantToInteger(RHS, DAG);
+ return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT,
+ DAG.getBitcast(DstVT, LHS), RHS);
+ }
+
return SDValue();
}
@@ -30632,13 +30658,7 @@ static SDValue combineBitcast(SDNode *N,
if (Subtarget.hasAVX512() && VT.isScalarInteger() &&
SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
- APInt Imm(SrcVT.getVectorNumElements(), 0);
- for (unsigned Idx = 0, e = N0.getNumOperands(); Idx < e; ++Idx) {
- SDValue In = N0.getOperand(Idx);
- if (!In.isUndef() && (cast<ConstantSDNode>(In)->getZExtValue() & 0x1))
- Imm.setBit(Idx);
- }
- return DAG.getConstant(Imm, SDLoc(N), VT);
+ return combinevXi1ConstantToInteger(N0, DAG);
}
// Try to remove bitcasts from input and output of mask arithmetic to
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=324662&r1=324661&r2=324662&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Thu Feb 8 14:26:39 2018
@@ -3754,14 +3754,9 @@ declare i16 @llvm.x86.avx512.kandn.w(i16
define i16 @test_kandn(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kandn:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k0
-; CHECK-NEXT: movw $8, %ax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: kandnw %k1, %k0, %k0
-; CHECK-NEXT: knotw %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: andl %esi, %eax
-; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: orl $-9, %edi
+; CHECK-NEXT: andl %esi, %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1)
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=324662&r1=324661&r2=324662&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Thu Feb 8 14:26:39 2018
@@ -513,11 +513,8 @@ define void @test7(<8 x i1> %mask) {
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
-; KNL-NEXT: movb $85, %al
-; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: testb %al, %al
+; KNL-NEXT: orb $85, %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
@@ -525,20 +522,16 @@ define void @test7(<8 x i1> %mask) {
; SKX: ## %bb.0: ## %allocas
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
; SKX-NEXT: vpmovw2m %xmm0, %k0
-; SKX-NEXT: movb $85, %al
-; SKX-NEXT: kmovd %eax, %k1
-; SKX-NEXT: kortestb %k1, %k0
+; SKX-NEXT: kmovd %k0, %eax
+; SKX-NEXT: orb $85, %al
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test7:
; AVX512BW: ## %bb.0: ## %allocas
; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
-; AVX512BW-NEXT: movb $85, %al
-; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: korw %k1, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: testb %al, %al
+; AVX512BW-NEXT: orb $85, %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -547,9 +540,8 @@ define void @test7(<8 x i1> %mask) {
; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
-; AVX512DQ-NEXT: movb $85, %al
-; AVX512DQ-NEXT: kmovw %eax, %k1
-; AVX512DQ-NEXT: kortestb %k1, %k0
+; AVX512DQ-NEXT: kmovw %k0, %eax
+; AVX512DQ-NEXT: orb $85, %al
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
allocas:
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=324662&r1=324661&r2=324662&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Thu Feb 8 14:26:39 2018
@@ -7020,18 +7020,16 @@ define void @vcmp_test7(<8 x i1> %mask)
; GENERIC: # %bb.0: # %allocas
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: movb $85, %al # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kortestb %k1, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
+; GENERIC-NEXT: orb $85, %al # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vcmp_test7:
; SKX: # %bb.0: # %allocas
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: movb $85, %al # sched: [1:0.25]
-; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: kortestb %k1, %k0 # sched: [3:1.00]
+; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
+; SKX-NEXT: orb $85, %al # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
allocas:
%a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
More information about the llvm-commits
mailing list