[llvm] r319406 - [X86] Optimize avx2 vgatherqps for v2f32 with v2i64 index type.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 29 23:01:40 PST 2017
Author: ctopper
Date: Wed Nov 29 23:01:40 2017
New Revision: 319406
URL: http://llvm.org/viewvc/llvm-project?rev=319406&view=rev
Log:
[X86] Optimize avx2 vgatherqps for v2f32 with v2i64 index type.
Normal type legalization will widen everything. This requires forcing 0s into the mask register. We can instead choose the form that only reads 2 elements without zeroing the mask.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=319406&r1=319405&r2=319406&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Nov 29 23:01:40 2017
@@ -1127,6 +1127,9 @@ X86TargetLowering::X86TargetLowering(con
}
if (HasInt256) {
+ // Custom legalize 2x32 to get a little better code.
+ setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
+
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
setOperationAction(ISD::MGATHER, VT, Custom);
@@ -1360,11 +1363,6 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::CTPOP, VT, Legal);
}
- // Custom legalize 2x32 to get a little better code.
- if (Subtarget.hasVLX()) {
- setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
- }
-
// Custom lower several nodes.
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
@@ -24863,7 +24861,7 @@ void X86TargetLowering::ReplaceNodeResul
}
case ISD::MGATHER: {
EVT VT = N->getValueType(0);
- if (VT == MVT::v2f32 && Subtarget.hasVLX()) {
+ if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) {
auto *Gather = cast<MaskedGatherSDNode>(N);
SDValue Index = Gather->getIndex();
if (Index.getValueType() != MVT::v2i64)
@@ -24873,10 +24871,17 @@ void X86TargetLowering::ReplaceNodeResul
SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
Gather->getValue(),
DAG.getUNDEF(MVT::v2f32));
+ if (!Subtarget.hasVLX()) {
+ // We need to widen the mask, but the instruction will only use 2
+ // of its elements. So we can use undef.
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask,
+ DAG.getUNDEF(MVT::v2i1));
+ Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask);
+ }
SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(),
Index };
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
- DAG.getVTList(MVT::v4f32, MVT::v2i1, MVT::Other), Ops, dl,
+ DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl,
Gather->getMemoryVT(), Gather->getMemOperand());
Results.push_back(Res);
Results.push_back(Res.getValue(2));
Modified: llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll?rev=319406&r1=319405&r2=319406&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll Wed Nov 29 23:01:40 2017
@@ -134,13 +134,12 @@ define <2 x float> @masked_gather_v2floa
;
; X64-LABEL: masked_gather_v2float:
; X64: # BB#0: # %entry
-; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; X64-NEXT: vmovaps (%rdi), %xmm2
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-NEXT: vpslld $31, %xmm0, %xmm0
; X64-NEXT: vpsrad $31, %xmm0, %xmm0
-; X64-NEXT: vmovaps (%rdi), %xmm2
-; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1
+; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1
; X64-NEXT: vmovaps %xmm1, %xmm0
-; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
; NOGATHER-LABEL: masked_gather_v2float:
@@ -185,13 +184,12 @@ define <4 x float> @masked_gather_v2floa
;
; X64-LABEL: masked_gather_v2float_concat:
; X64: # BB#0: # %entry
-; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; X64-NEXT: vmovaps (%rdi), %xmm2
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-NEXT: vpslld $31, %xmm0, %xmm0
; X64-NEXT: vpsrad $31, %xmm0, %xmm0
-; X64-NEXT: vmovaps (%rdi), %xmm2
-; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1
+; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1
; X64-NEXT: vmovaps %xmm1, %xmm0
-; X64-NEXT: vzeroupper
; X64-NEXT: retq
;
; NOGATHER-LABEL: masked_gather_v2float_concat:
More information about the llvm-commits
mailing list