[llvm-branch-commits] [llvm-branch] r286251 - [3.9.1] Merging r283070 - [X86][AVX] Ensure broadcast loads respect dependencies

Simon Pilgrim via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Nov 8 09:01:05 PST 2016


Author: rksimon
Date: Tue Nov  8 11:01:05 2016
New Revision: 286251

URL: http://llvm.org/viewvc/llvm-project?rev=286251&view=rev
Log:
[3.9.1] Merging r283070 - [X86][AVX] Ensure broadcast loads respect dependencies

To allow broadcast loads of a non-zero'th vector element, lowerVectorShuffleAsBroadcast can replace a load with a new load with an adjusted address, but unfortunately we weren't ensuring that the new load respected the same dependencies.

This patch adds a TokenFactor and updates all dependencies of the old load to reference the new load instead.

Bug found during internal testing.

Differential Revision: https://reviews.llvm.org/D25039

As discussed on PR30596

Modified:
    llvm/branches/release_39/   (props changed)
    llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp
    llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll

Propchange: llvm/branches/release_39/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Nov  8 11:01:05 2016
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,277773,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,280837,281957,282613,283129
+/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276051,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276676,276712,276740,276823,276956,276980,277093,277114,277135,277371,277399,277500,277504,277625,277691,277693,277773,278002,278086,278133,278157,278343,278370,278413,278558-278559,278562,278569,278571,278573,278575,278584,278841,278900,278938,278999,279125,279268,279369,279647,280837,281957,282613,283070,283129

Modified: llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp?rev=286251&r1=286250&r2=286251&view=diff
==============================================================================
--- llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp Tue Nov  8 11:01:05 2016
@@ -8656,6 +8656,17 @@ static SDValue lowerVectorShuffleAsBroad
     V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
                     DAG.getMachineFunction().getMachineMemOperand(
                         Ld->getMemOperand(), Offset, SVT.getStoreSize()));
+
+    // Make sure the newly-created LOAD is in the same position as Ld in
+    // terms of dependency. We create a TokenFactor for Ld and V,
+    // and update uses of Ld's output chain to use the TokenFactor.
+    if (Ld->hasAnyUseOfValue(1)) {
+      SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                                     SDValue(Ld, 1), SDValue(V.getNode(), 1));
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
+      DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
+                             SDValue(V.getNode(), 1));
+    }
   } else if (!BroadcastFromReg) {
     // We can't broadcast from a vector register.
     return SDValue();

Modified: llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll?rev=286251&r1=286250&r2=286251&view=diff
==============================================================================
--- llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll (original)
+++ llvm/branches/release_39/test/CodeGen/X86/avx-vbroadcast.ll Tue Nov  8 11:01:05 2016
@@ -548,38 +548,40 @@ define <4 x double> @splat_concat4(doubl
 }
 
 ;
-; FIXME: When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
+; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
 ;
 define float @broadcast_lifetime() nounwind {
 ; X32-LABEL: broadcast_lifetime:
 ; X32:       ## BB#0:
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    subl $40, %esp
+; X32-NEXT:    subl $56, %esp
 ; X32-NEXT:    leal {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl %esi, (%esp)
 ; X32-NEXT:    calll _gfunc
+; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp) ## 16-byte Spill
 ; X32-NEXT:    movl %esi, (%esp)
 ; X32-NEXT:    calll _gfunc
 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm0
-; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm1
-; X32-NEXT:    vsubss %xmm0, %xmm1, %xmm0
+; X32-NEXT:    vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## 16-byte Folded Reload
 ; X32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
 ; X32-NEXT:    flds {{[0-9]+}}(%esp)
-; X32-NEXT:    addl $40, %esp
+; X32-NEXT:    addl $56, %esp
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_lifetime:
 ; X64:       ## BB#0:
-; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    subq $40, %rsp
 ; X64-NEXT:    leaq (%rsp), %rdi
 ; X64-NEXT:    callq _gfunc
+; X64-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %xmm0
+; X64-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill
 ; X64-NEXT:    leaq (%rsp), %rdi
 ; X64-NEXT:    callq _gfunc
 ; X64-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %xmm0
-; X64-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %xmm1
-; X64-NEXT:    vsubss %xmm0, %xmm1, %xmm0
-; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    vsubss {{[0-9]+}}(%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload
+; X64-NEXT:    addq $40, %rsp
 ; X64-NEXT:    retq
   %1 = alloca <4 x float>, align 16
   %2 = alloca <4 x float>, align 16




More information about the llvm-branch-commits mailing list