[llvm] r338085 - [SelectionDAGBuilder] Add masked loads to PendingLoads rather than calling DAG.setRoot.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 26 16:22:11 PDT 2018


Author: ctopper
Date: Thu Jul 26 16:22:11 2018
New Revision: 338085

URL: http://llvm.org/viewvc/llvm-project?rev=338085&view=rev
Log:
[SelectionDAGBuilder] Add masked loads to PendingLoads rather than calling DAG.setRoot.

Masked loads are calling DAG.getRoot rather than calling SelectionDAGBuilder::getRoot, which means the PendingLoads weren't emptied to update the root and create any needed TokenFactor. So it would be incorrect to call setRoot for the masked load.

This patch instead adds the masked load to PendingLoads so that the root doesn't get update until a store or scatter or something happens.. Alternatively, we could call SelectionDAGBuilder::getRoot before it, but that would create unnecessary serialization.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll
    llvm/trunk/test/CodeGen/X86/avx512-masked-memop-64-32.ll
    llvm/trunk/test/CodeGen/X86/masked_memop.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=338085&r1=338084&r2=338085&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Thu Jul 26 16:22:11 2018
@@ -4059,10 +4059,8 @@ void SelectionDAGBuilder::visitMaskedLoa
 
   SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
                                    ISD::NON_EXTLOAD, IsExpanding);
-  if (AddToChain) {
-    SDValue OutChain = Load.getValue(1);
-    DAG.setRoot(OutChain);
-  }
+  if (AddToChain)
+    PendingLoads.push_back(Load.getValue(1));
   setValue(&I, Load);
 }
 

Modified: llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll?rev=338085&r1=338084&r2=338085&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-bugfix-26264.ll Thu Jul 26 16:22:11 2018
@@ -7,13 +7,12 @@ define <32 x double> @test_load_32f64(<3
 ; AVX512BW-NEXT:    vpsllw $7, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpmovb2m %zmm0, %k1
 ; AVX512BW-NEXT:    vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
-; AVX512BW-NEXT:    kshiftrd $16, %k1, %k2
-; AVX512BW-NEXT:    vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2}
+; AVX512BW-NEXT:    kshiftrw $8, %k1, %k2
+; AVX512BW-NEXT:    vblendmpd 64(%rdi), %zmm2, %zmm1 {%k2}
+; AVX512BW-NEXT:    kshiftrd $16, %k1, %k1
+; AVX512BW-NEXT:    vblendmpd 128(%rdi), %zmm3, %zmm2 {%k1}
 ; AVX512BW-NEXT:    kshiftrw $8, %k1, %k1
-; AVX512BW-NEXT:    vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
-; AVX512BW-NEXT:    kshiftrw $8, %k2, %k1
 ; AVX512BW-NEXT:    vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
-; AVX512BW-NEXT:    vmovapd %zmm5, %zmm2
 ; AVX512BW-NEXT:    retq
   %res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
   ret <32 x double> %res
@@ -25,13 +24,12 @@ define <32 x i64> @test_load_32i64(<32 x
 ; AVX512BW-NEXT:    vpsllw $7, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpmovb2m %zmm0, %k1
 ; AVX512BW-NEXT:    vpblendmq (%rdi), %zmm1, %zmm0 {%k1}
-; AVX512BW-NEXT:    kshiftrd $16, %k1, %k2
-; AVX512BW-NEXT:    vpblendmq 128(%rdi), %zmm3, %zmm5 {%k2}
+; AVX512BW-NEXT:    kshiftrw $8, %k1, %k2
+; AVX512BW-NEXT:    vpblendmq 64(%rdi), %zmm2, %zmm1 {%k2}
+; AVX512BW-NEXT:    kshiftrd $16, %k1, %k1
+; AVX512BW-NEXT:    vpblendmq 128(%rdi), %zmm3, %zmm2 {%k1}
 ; AVX512BW-NEXT:    kshiftrw $8, %k1, %k1
-; AVX512BW-NEXT:    vpblendmq 64(%rdi), %zmm2, %zmm1 {%k1}
-; AVX512BW-NEXT:    kshiftrw $8, %k2, %k1
 ; AVX512BW-NEXT:    vpblendmq 192(%rdi), %zmm4, %zmm3 {%k1}
-; AVX512BW-NEXT:    vmovdqa64 %zmm5, %zmm2
 ; AVX512BW-NEXT:    retq
   %res = call <32 x i64> @llvm.masked.load.v32i64.p0v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0)
   ret <32 x i64> %res

Modified: llvm/trunk/test/CodeGen/X86/avx512-masked-memop-64-32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-masked-memop-64-32.ll?rev=338085&r1=338084&r2=338085&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-masked-memop-64-32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-masked-memop-64-32.ll Thu Jul 26 16:22:11 2018
@@ -94,10 +94,10 @@ declare <16 x i32*> @llvm.masked.load.v1
 define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
 ; AVX512-LABEL: test23:
 ; AVX512:       ## %bb.0:
-; AVX512-NEXT:    vptestnmq %zmm0, %zmm0, %k1
-; AVX512-NEXT:    vptestnmq %zmm1, %zmm1, %k2
-; AVX512-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
-; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vptestnmq %zmm1, %zmm1, %k1
+; AVX512-NEXT:    vptestnmq %zmm0, %zmm0, %k2
+; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k2} {z}
+; AVX512-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
 ; AVX512-NEXT:    retq
   %mask = icmp eq <16 x i32*> %trigger, zeroinitializer
   %res = call <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
@@ -234,19 +234,19 @@ declare <16 x double> @llvm.masked.load.
 define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0)  {
 ; AVX512F-LABEL: test_load_32f64:
 ; AVX512F:       ## %bb.0:
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm5
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm5
+; AVX512F-NEXT:    vpmovsxbd %xmm5, %zmm5
 ; AVX512F-NEXT:    vpslld $31, %zmm5, %zmm5
 ; AVX512F-NEXT:    vptestmd %zmm5, %zmm5, %k1
-; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k2
-; AVX512F-NEXT:    vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2}
-; AVX512F-NEXT:    vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT:    vblendmpd (%rdi), %zmm1, %zmm0 {%k2}
+; AVX512F-NEXT:    vblendmpd 128(%rdi), %zmm3, %zmm5 {%k1}
 ; AVX512F-NEXT:    kshiftrw $8, %k2, %k2
-; AVX512F-NEXT:    vblendmpd 192(%rdi), %zmm4, %zmm3 {%k2}
+; AVX512F-NEXT:    vblendmpd 64(%rdi), %zmm2, %zmm1 {%k2}
 ; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
-; AVX512F-NEXT:    vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
+; AVX512F-NEXT:    vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
 ; AVX512F-NEXT:    vmovapd %zmm5, %zmm2
 ; AVX512F-NEXT:    retq
 ;
@@ -255,13 +255,12 @@ define <32 x double> @test_load_32f64(<3
 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
 ; SKX-NEXT:    vpmovb2m %ymm0, %k1
 ; SKX-NEXT:    vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
-; SKX-NEXT:    kshiftrd $16, %k1, %k2
-; SKX-NEXT:    vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2}
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    vblendmpd 64(%rdi), %zmm2, %zmm1 {%k2}
+; SKX-NEXT:    kshiftrd $16, %k1, %k1
+; SKX-NEXT:    vblendmpd 128(%rdi), %zmm3, %zmm2 {%k1}
 ; SKX-NEXT:    kshiftrw $8, %k1, %k1
-; SKX-NEXT:    vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
-; SKX-NEXT:    kshiftrw $8, %k2, %k1
 ; SKX-NEXT:    vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
-; SKX-NEXT:    vmovapd %zmm5, %zmm2
 ; SKX-NEXT:    retq
   %res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
   ret <32 x double> %res

Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=338085&r1=338084&r2=338085&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Thu Jul 26 16:22:11 2018
@@ -976,8 +976,8 @@ define <4 x i64> @mload_constmask_v4i64(
 define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %dst) {
 ; AVX-LABEL: mload_constmask_v8f64:
 ; AVX:       ## %bb.0:
-; AVX-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],mem[6,7]
 ; AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6,7]
+; AVX-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],mem[6,7]
 ; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: mload_constmask_v8f64:




More information about the llvm-commits mailing list