[llvm] r343271 - [ScalarizeMaskedMemIntrin] When expanding masked loads, start with the passthru value and insert each conditional load result over their element.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 27 14:28:53 PDT 2018


Author: ctopper
Date: Thu Sep 27 14:28:52 2018
New Revision: 343271

URL: http://llvm.org/viewvc/llvm-project?rev=343271&view=rev
Log:
[ScalarizeMaskedMemIntrin] When expanding masked loads, start with the passthru value and insert each conditional load result over their element.

Previously we started with undef and did one final merge at the end with a select.

Modified:
    llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
    llvm/trunk/test/CodeGen/X86/masked_memop.ll
    llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll

Modified: llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp?rev=343271&r1=343270&r2=343271&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp Thu Sep 27 14:28:52 2018
@@ -90,7 +90,7 @@ FunctionPass *llvm::createScalarizeMaske
 // cond.load:                                        ; preds = %0
 //  %3 = getelementptr i32* %1, i32 0
 //  %4 = load i32* %3
-//  %5 = insertelement <16 x i32> undef, i32 %4, i32 0
+//  %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
 //  br label %else
 //
 // else:                                             ; preds = %0, %cond.load
@@ -146,10 +146,8 @@ static void scalarizeMaskedLoad(CallInst
   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
   unsigned VectorWidth = VecType->getNumElements();
 
-  Value *UndefVal = UndefValue::get(VecType);
-
   // The result vector
-  Value *VResult = UndefVal;
+  Value *VResult = Src0;
 
   if (isa<Constant>(Mask)) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
@@ -161,15 +159,11 @@ static void scalarizeMaskedLoad(CallInst
       VResult =
           Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
     }
-    Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
-    CI->replaceAllUsesWith(NewI);
+    CI->replaceAllUsesWith(VResult);
     CI->eraseFromParent();
     return;
   }
 
-  PHINode *Phi = nullptr;
-  Value *PrevPhi = UndefVal;
-
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
     // Fill the "else" block, created in the previous iteration
     //
@@ -177,13 +171,6 @@ static void scalarizeMaskedLoad(CallInst
     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
     //  br i1 %mask_1, label %cond.load, label %else
     //
-    if (Idx > 0) {
-      Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
-      Phi->addIncoming(VResult, CondBlock);
-      Phi->addIncoming(PrevPhi, PrevIfBlock);
-      PrevPhi = Phi;
-      VResult = Phi;
-    }
 
     Value *Predicate =
         Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
@@ -200,7 +187,8 @@ static void scalarizeMaskedLoad(CallInst
     Value *Gep =
         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
-    VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+    Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+                                                    Builder.getInt32(Idx));
 
     // Create "else" block, fill it in the next iteration
     BasicBlock *NewIfBlock =
@@ -211,13 +199,15 @@ static void scalarizeMaskedLoad(CallInst
     OldBr->eraseFromParent();
     PrevIfBlock = IfBlock;
     IfBlock = NewIfBlock;
+
+    // Create the phi to join the new and previous value.
+    PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+    Phi->addIncoming(NewVResult, CondBlock);
+    Phi->addIncoming(VResult, PrevIfBlock);
+    VResult = Phi;
   }
 
-  Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
-  Phi->addIncoming(VResult, CondBlock);
-  Phi->addIncoming(PrevPhi, PrevIfBlock);
-  Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
-  CI->replaceAllUsesWith(NewI);
+  CI->replaceAllUsesWith(VResult);
   CI->eraseFromParent();
 }
 

Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=343271&r1=343270&r2=343271&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Thu Sep 27 14:28:52 2018
@@ -12,50 +12,20 @@ define <1 x double> @loadv1(<1 x i64> %t
 ; AVX-LABEL: loadv1:
 ; AVX:       ## %bb.0:
 ; AVX-NEXT:    testq %rdi, %rdi
-; AVX-NEXT:    ## implicit-def: $xmm1
-; AVX-NEXT:    je LBB0_1
-; AVX-NEXT:  ## %bb.2: ## %else
-; AVX-NEXT:    testq %rdi, %rdi
-; AVX-NEXT:    jne LBB0_3
-; AVX-NEXT:  LBB0_4: ## %else
-; AVX-NEXT:    vmovaps %xmm1, %xmm0
-; AVX-NEXT:    retq
-; AVX-NEXT:  LBB0_1: ## %cond.load
-; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT:    testq %rdi, %rdi
-; AVX-NEXT:    je LBB0_4
-; AVX-NEXT:  LBB0_3: ## %else
-; AVX-NEXT:    vmovaps %xmm0, %xmm1
-; AVX-NEXT:    vmovaps %xmm1, %xmm0
+; AVX-NEXT:    jne LBB0_2
+; AVX-NEXT:  ## %bb.1: ## %cond.load
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:  LBB0_2: ## %else
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: loadv1:
-; AVX512F:       ## %bb.0:
-; AVX512F-NEXT:    testq %rdi, %rdi
-; AVX512F-NEXT:    ## implicit-def: $xmm1
-; AVX512F-NEXT:    jne LBB0_2
-; AVX512F-NEXT:  ## %bb.1: ## %cond.load
-; AVX512F-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512F-NEXT:  LBB0_2: ## %else
-; AVX512F-NEXT:    testq %rdi, %rdi
-; AVX512F-NEXT:    sete %al
-; AVX512F-NEXT:    kmovw %eax, %k1
-; AVX512F-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
-; AVX512F-NEXT:    retq
-;
-; SKX-LABEL: loadv1:
-; SKX:       ## %bb.0:
-; SKX-NEXT:    testq %rdi, %rdi
-; SKX-NEXT:    ## implicit-def: $xmm1
-; SKX-NEXT:    jne LBB0_2
-; SKX-NEXT:  ## %bb.1: ## %cond.load
-; SKX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; SKX-NEXT:  LBB0_2: ## %else
-; SKX-NEXT:    testq %rdi, %rdi
-; SKX-NEXT:    sete %al
-; SKX-NEXT:    kmovd %eax, %k1
-; SKX-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
-; SKX-NEXT:    retq
+; AVX512-LABEL: loadv1:
+; AVX512:       ## %bb.0:
+; AVX512-NEXT:    testq %rdi, %rdi
+; AVX512-NEXT:    jne LBB0_2
+; AVX512-NEXT:  ## %bb.1: ## %cond.load
+; AVX512-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX512-NEXT:  LBB0_2: ## %else
+; AVX512-NEXT:    retq
   %mask = icmp eq <1 x i64> %trigger, zeroinitializer
   %res = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* %addr, i32 4, <1 x i1>%mask, <1 x double>%dst)
   ret <1 x double> %res

Modified: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll?rev=343271&r1=343270&r2=343271&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll (original)
+++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll Thu Sep 27 14:28:52 2018
@@ -9,10 +9,10 @@ define <2 x i64> @scalarize_v2i64(<2 x i
 ; CHECK:       cond.load:
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP4]], i32 0
 ; CHECK-NEXT:    br label [[ELSE]]
 ; CHECK:       else:
-; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ undef, [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[MASK]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
 ; CHECK:       cond.load1:
@@ -21,9 +21,8 @@ define <2 x i64> @scalarize_v2i64(<2 x i
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP8]], i32 1
 ; CHECK-NEXT:    br label [[ELSE2]]
 ; CHECK:       else2:
-; CHECK-NEXT:    [[RES_PHI_SELECT:%.*]] = phi <2 x i64> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[MASK]], <2 x i64> [[RES_PHI_SELECT]], <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <2 x i64> [[TMP10]]
+; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
 ;
   %ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> %mask, <2 x i64> %passthru)
   ret <2 x i64> %ret
@@ -41,8 +40,7 @@ define <2 x i64> @scalarize_v2i64_ones_m
 define <2 x i64> @scalarize_v2i64_zero_mask(<2 x i64>* %p, <2 x i64> %passthru) {
 ; CHECK-LABEL: @scalarize_v2i64_zero_mask(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> zeroinitializer, <2 x i64> undef, <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+; CHECK-NEXT:    ret <2 x i64> [[PASSTHRU:%.*]]
 ;
   %ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
   ret <2 x i64> %ret
@@ -53,9 +51,8 @@ define <2 x i64> @scalarize_v2i64_const_
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> <i1 false, i1 true>, <2 x i64> [[TMP4]], <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP3]], i32 1
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
 ;
   %ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
   ret <2 x i64> %ret




More information about the llvm-commits mailing list