[llvm] r312460 - [X86] Add a combine to turn (insert_subvector zero, (insert_subvector zero, X, Idx), Idx) into an insert of X into the larger zero vector.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 3 15:25:52 PDT 2017
Author: ctopper
Date: Sun Sep 3 15:25:52 2017
New Revision: 312460
URL: http://llvm.org/viewvc/llvm-project?rev=312460&view=rev
Log:
[X86] Add a combine to turn (insert_subvector zero, (insert_subvector zero, X, Idx), Idx) into an insert of X into the larger zero vector.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=312460&r1=312459&r2=312460&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 3 15:25:52 2017
@@ -35656,10 +35656,21 @@ static SDValue combineInsertSubvector(SD
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT SubVecVT = SubVec.getSimpleValueType();
- // Inserting zeros into zeros is a nop.
- if (ISD::isBuildVectorAllZeros(Vec.getNode()) &&
- ISD::isBuildVectorAllZeros(SubVec.getNode()))
- return Vec;
+ if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
+ // Inserting zeros into zeros is a nop.
+ if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
+ return Vec;
+
+ // If we're inserting into a zero vector and then into a larger zero vector,
+ // just insert into the larger zero vector directly.
+ if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) {
+ unsigned Idx2Val = cast<ConstantSDNode>(Idx)->getZExtValue();
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec,
+ SubVec.getOperand(1),
+ DAG.getIntPtrConstant(IdxVal + Idx2Val, dl));
+ }
+ }
// If this is an insert of an extract, combine to a shuffle. Don't do this
// if the insert or extract can be represented with a subregister operation.
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=312460&r1=312459&r2=312460&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Sun Sep 3 15:25:52 2017
@@ -1134,13 +1134,11 @@ define <8 x double> @test_mm512_zextpd12
; X32-LABEL: test_mm512_zextpd128_pd512:
; X32: # BB#0:
; X32-NEXT: vmovaps %xmm0, %xmm0
-; X32-NEXT: vmovaps %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextpd128_pd512:
; X64: # BB#0:
; X64-NEXT: vmovaps %xmm0, %xmm0
-; X64-NEXT: vmovaps %ymm0, %ymm0
; X64-NEXT: retq
%res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
ret <8 x double> %res
@@ -1196,13 +1194,11 @@ define <8 x i64> @test_mm512_zextsi128_s
; X32-LABEL: test_mm512_zextsi128_si512:
; X32: # BB#0:
; X32-NEXT: vmovaps %xmm0, %xmm0
-; X32-NEXT: vmovaps %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextsi128_si512:
; X64: # BB#0:
; X64-NEXT: vmovaps %xmm0, %xmm0
-; X64-NEXT: vmovaps %ymm0, %ymm0
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
ret <8 x i64> %res
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll?rev=312460&r1=312459&r2=312460&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll Sun Sep 3 15:25:52 2017
@@ -107,14 +107,12 @@ define <8 x double> @merge_8f64_f64_12zz
; ALL-LABEL: merge_8f64_f64_12zzuuzz:
; ALL: # BB#0:
; ALL-NEXT: vmovaps 8(%rdi), %xmm0
-; ALL-NEXT: vmovaps %ymm0, %ymm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0
-; X32-AVX512F-NEXT: vmovaps %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
%ptr1 = getelementptr inbounds double, double* %ptr, i64 2
More information about the llvm-commits
mailing list