[PATCH] D37453: [X86] In combineLoopSADPattern, pad result with zeros and use full size add instead of using a smaller add and inserting.
Phabricator via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 27 11:38:47 PDT 2017
This revision was automatically updated to reflect the committed changes.
Closed by commit rL314331: [X86] In combineLoopSADPattern, pad result with zeros and use full size add… (authored by ctopper).
Changed prior to commit:
https://reviews.llvm.org/D37453?vs=113795&id=116852#toc
Repository:
rL LLVM
https://reviews.llvm.org/D37453
Files:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/sad.ll
Index: llvm/trunk/test/CodeGen/X86/sad.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/sad.ll
+++ llvm/trunk/test/CodeGen/X86/sad.ll
@@ -43,8 +43,8 @@
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
; AVX2-NEXT: vmovdqu a+1024(%rax), %xmm2
; AVX2-NEXT: vpsadbw b+1024(%rax), %xmm2, %xmm2
-; AVX2-NEXT: vpaddd %xmm1, %xmm2, %xmm2
-; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT: vmovdqa %xmm2, %xmm2
+; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
; AVX2-NEXT: addq $4, %rax
; AVX2-NEXT: jne .LBB0_1
; AVX2-NEXT: # BB#2: # %middle.block
@@ -67,8 +67,8 @@
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512F-NEXT: vmovdqu a+1024(%rax), %xmm1
; AVX512F-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
-; AVX512F-NEXT: vpaddd %xmm0, %xmm1, %xmm1
-; AVX512F-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa %xmm1, %xmm1
+; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: addq $4, %rax
; AVX512F-NEXT: jne .LBB0_1
; AVX512F-NEXT: # BB#2: # %middle.block
@@ -93,8 +93,8 @@
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512BW-NEXT: vmovdqu a+1024(%rax), %xmm1
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
-; AVX512BW-NEXT: vpaddd %xmm0, %xmm1, %xmm1
-; AVX512BW-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa %xmm1, %xmm1
+; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: addq $4, %rax
; AVX512BW-NEXT: jne .LBB0_1
; AVX512BW-NEXT: # BB#2: # %middle.block
@@ -315,8 +315,8 @@
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512F-NEXT: vmovdqa a+1024(%rax), %ymm2
; AVX512F-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
-; AVX512F-NEXT: vpaddd %ymm1, %ymm2, %ymm2
-; AVX512F-NEXT: vinserti64x4 $0, %ymm2, %zmm1, %zmm1
+; AVX512F-NEXT: vmovdqa %ymm2, %ymm2
+; AVX512F-NEXT: vpaddd %zmm1, %zmm2, %zmm1
; AVX512F-NEXT: addq $4, %rax
; AVX512F-NEXT: jne .LBB1_1
; AVX512F-NEXT: # BB#2: # %middle.block
@@ -343,8 +343,8 @@
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512BW-NEXT: vmovdqa a+1024(%rax), %ymm2
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
-; AVX512BW-NEXT: vpaddd %ymm1, %ymm2, %ymm2
-; AVX512BW-NEXT: vinserti64x4 $0, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vmovdqa %ymm2, %ymm2
+; AVX512BW-NEXT: vpaddd %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: addq $4, %rax
; AVX512BW-NEXT: jne .LBB1_1
; AVX512BW-NEXT: # BB#2: # %middle.block
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -35536,16 +35536,13 @@
Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
- // Update part of elements of the reduction vector. This is done by first
- // extracting a sub-vector from it, updating this sub-vector, and inserting
- // it back.
- SDValue SubPhi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Phi,
- DAG.getIntPtrConstant(0, DL));
- SDValue Res = DAG.getNode(ISD::ADD, DL, ResVT, Sad, SubPhi);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Phi, Res,
- DAG.getIntPtrConstant(0, DL));
- } else
- return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi);
+ // Fill the upper elements with zero to match the add width.
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
+ return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi);
}
/// Convert vector increment or decrement to sub/add with an all-ones constant:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D37453.116852.patch
Type: text/x-patch
Size: 3879 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170927/82382935/attachment.bin>
More information about the llvm-commits
mailing list