[llvm] r321408 - [X86] Remove type restrictions from WidenMaskArithmetic.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 23 10:53:05 PST 2017
Author: ctopper
Date: Sat Dec 23 10:53:05 2017
New Revision: 321408
URL: http://llvm.org/viewvc/llvm-project?rev=321408&view=rev
Log:
[X86] Remove type restrictions from WidenMaskArithmetic.
This can help AVX-512 code where mask types are legal allowing us to remove extends and truncates to/from mask types.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
llvm/trunk/test/CodeGen/X86/avx512-ext.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=321408&r1=321407&r2=321408&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Dec 23 10:53:05 2017
@@ -33012,21 +33012,20 @@ static SDValue combineANDXORWithAllOnesI
// register. In most cases we actually compare or select YMM-sized registers
// and mixing the two types creates horrible code. This method optimizes
// some of the transition sequences.
+// Even with AVX-512 this is still useful for removing casts around logical
+// operations on vXi1 mask types.
static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
- if (!VT.is256BitVector())
- return SDValue();
+ assert(VT.isVector() && "Expected vector type");
assert((N->getOpcode() == ISD::ANY_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND ||
N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
SDValue Narrow = N->getOperand(0);
- EVT NarrowVT = Narrow->getValueType(0);
- if (!NarrowVT.is128BitVector())
- return SDValue();
+ EVT NarrowVT = Narrow.getValueType();
if (Narrow->getOpcode() != ISD::XOR &&
Narrow->getOpcode() != ISD::AND &&
@@ -35917,7 +35916,7 @@ static SDValue combineSext(SDNode *N, Se
if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
return V;
- if (Subtarget.hasAVX() && VT.is256BitVector())
+ if (VT.isVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
@@ -36109,7 +36108,7 @@ static SDValue combineZext(SDNode *N, Se
if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
return V;
- if (VT.is256BitVector())
+ if (VT.isVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
Modified: llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll?rev=321408&r1=321407&r2=321408&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll Sat Dec 23 10:53:05 2017
@@ -17,78 +17,40 @@ define <16 x i1> @test1() {
}
define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
-; KNL-LABEL: test2:
-; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
-; KNL-NEXT: vpslld $31, %zmm1, %zmm1
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT: vpmovdb %zmm0, %xmm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test2:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
-; SKX-NEXT: vpmovb2m %xmm1, %k0
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
-; SKX-NEXT: vpmovb2m %xmm0, %k1
-; SKX-NEXT: kandw %k0, %k1, %k0
-; SKX-NEXT: vpmovm2b %k0, %xmm0
-; SKX-NEXT: retq
+; ALL_X64-LABEL: test2:
+; ALL_X64: ## %bb.0:
+; ALL_X64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; ALL_X64-NEXT: vpsllw $7, %xmm0, %xmm0
+; ALL_X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; ALL_X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; ALL_X64-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test2:
; KNL_X32: ## %bb.0:
-; KNL_X32-NEXT: vpmovsxbd %xmm1, %zmm1
-; KNL_X32-NEXT: vpslld $31, %zmm1, %zmm1
-; KNL_X32-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL_X32-NEXT: vptestmd %zmm0, %zmm0, %k1
-; KNL_X32-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
+; KNL_X32-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL_X32-NEXT: vpsllw $7, %xmm0, %xmm0
+; KNL_X32-NEXT: vpand LCPI1_0, %xmm0, %xmm0
+; KNL_X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; KNL_X32-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; KNL_X32-NEXT: retl
%c = and <16 x i1>%a, %b
ret <16 x i1> %c
}
define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
-; KNL-LABEL: test3:
-; KNL: ## %bb.0:
-; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
-; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
-; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
-; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
-; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
-; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT: vpmovdw %zmm0, %ymm0
-; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test3:
-; SKX: ## %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
-; SKX-NEXT: vpmovw2m %xmm1, %k0
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
-; SKX-NEXT: vpmovw2m %xmm0, %k1
-; SKX-NEXT: kandb %k0, %k1, %k0
-; SKX-NEXT: vpmovm2w %k0, %xmm0
-; SKX-NEXT: retq
+; ALL_X64-LABEL: test3:
+; ALL_X64: ## %bb.0:
+; ALL_X64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; ALL_X64-NEXT: vpsllw $15, %xmm0, %xmm0
+; ALL_X64-NEXT: vpsraw $15, %xmm0, %xmm0
+; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test3:
; KNL_X32: ## %bb.0:
-; KNL_X32-NEXT: vpmovsxwq %xmm1, %zmm1
-; KNL_X32-NEXT: vpsllq $63, %zmm1, %zmm1
-; KNL_X32-NEXT: vpmovsxwq %xmm0, %zmm0
-; KNL_X32-NEXT: vpsllq $63, %zmm0, %zmm0
-; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1
-; KNL_X32-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
-; KNL_X32-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
+; KNL_X32-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL_X32-NEXT: vpsllw $15, %xmm0, %xmm0
+; KNL_X32-NEXT: vpsraw $15, %xmm0, %xmm0
; KNL_X32-NEXT: retl
%c = and <8 x i1>%a, %b
ret <8 x i1> %c
@@ -102,11 +64,9 @@ define <4 x i1> @test4(<4 x i1>%a, <4 x
;
; SKX-LABEL: test4:
; SKX: ## %bb.0:
-; SKX-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
-; SKX-NEXT: vptestmd %xmm1, %xmm1, %k0 {%k1}
-; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vpsrad $31, %xmm0, %xmm0
; SKX-NEXT: retq
;
; KNL_X32-LABEL: test4:
Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=321408&r1=321407&r2=321408&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Sat Dec 23 10:53:05 2017
@@ -1366,21 +1366,12 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
}
define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
-; KNL-LABEL: trunc_4i32_to_4i1:
-; KNL: # %bb.0:
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpslld $31, %xmm0, %xmm0
-; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: trunc_4i32_to_4i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0
-; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
-; SKX-NEXT: vpslld $31, %xmm1, %xmm0
-; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1}
-; SKX-NEXT: vpmovm2d %k0, %xmm0
-; SKX-NEXT: retq
+; ALL-LABEL: trunc_4i32_to_4i1:
+; ALL: # %bb.0:
+; ALL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; ALL-NEXT: vpslld $31, %xmm0, %xmm0
+; ALL-NEXT: vpsrad $31, %xmm0, %xmm0
+; ALL-NEXT: retq
%mask_a = trunc <4 x i32>%a to <4 x i1>
%mask_b = trunc <4 x i32>%b to <4 x i1>
%a_and_b = and <4 x i1>%mask_a, %mask_b
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=321408&r1=321407&r2=321408&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Sat Dec 23 10:53:05 2017
@@ -4376,20 +4376,16 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
; GENERIC-LABEL: trunc_4i32_to_4i1:
; GENERIC: # %bb.0:
+; GENERIC-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: trunc_4i32_to_4i1:
; SKX: # %bb.0:
+; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask_a = trunc <4 x i32>%a to <4 x i1>
%mask_b = trunc <4 x i32>%b to <4 x i1>
More information about the llvm-commits
mailing list