[llvm] r367715 - [ScalarizeMaskedMemIntrin] Add constant mask support to expandload and compressstore scalarization

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 2 13:04:34 PDT 2019


Author: ctopper
Date: Fri Aug  2 13:04:34 2019
New Revision: 367715

URL: http://llvm.org/viewvc/llvm-project?rev=367715&view=rev
Log:
[ScalarizeMaskedMemIntrin] Add constant mask support to expandload and compressstore scalarization

This adds support for generating all the loads or stores for a constant mask into a single basic block with no conditionals.

Differential Revision: https://reviews.llvm.org/D65613

Modified:
    llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
    llvm/trunk/test/CodeGen/X86/masked_compressstore.ll
    llvm/trunk/test/CodeGen/X86/masked_expandload.ll
    llvm/trunk/test/CodeGen/X86/pr39666.ll
    llvm/trunk/test/CodeGen/X86/pr40994.ll
    llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll
    llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll

Modified: llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp?rev=367715&r1=367714&r2=367715&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp Fri Aug  2 13:04:34 2019
@@ -616,6 +616,24 @@ static void scalarizeMaskedExpandLoad(Ca
   // The result vector
   Value *VResult = PassThru;
 
+  // Shorten the way if the mask is a vector of constants.
+  if (isConstantIntVector(Mask)) {
+    unsigned MemIndex = 0;
+    for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+        continue;
+      Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+      LoadInst *Load =
+          Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx));
+      VResult =
+          Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
+      ++MemIndex;
+    }
+    CI->replaceAllUsesWith(VResult);
+    CI->eraseFromParent();
+    return;
+  }
+
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
     // Fill the "else" block, created in the previous iteration
     //
@@ -694,6 +712,22 @@ static void scalarizeMaskedCompressStore
 
   unsigned VectorWidth = VecType->getNumElements();
 
+  // Shorten the way if the mask is a vector of constants.
+  if (isConstantIntVector(Mask)) {
+    unsigned MemIndex = 0;
+    for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+        continue;
+      Value *OneElt =
+          Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+      Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+      Builder.CreateAlignedStore(OneElt, NewPtr, 1);
+      ++MemIndex;
+    }
+    CI->eraseFromParent();
+    return;
+  }
+
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
     // Fill the "else" block, created in the previous iteration
     //

Modified: llvm/trunk/test/CodeGen/X86/masked_compressstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_compressstore.ll?rev=367715&r1=367714&r2=367715&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_compressstore.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_compressstore.ll Fri Aug  2 13:04:34 2019
@@ -1123,7 +1123,7 @@ define void @compressstore_v8f32_v8i1(fl
 
 define void @compressstore_v16f32_const(float* %base, <16 x float> %V) {
 ; SSE2-LABEL: compressstore_v16f32_const:
-; SSE2:       ## %bb.0: ## %cond.store
+; SSE2:       ## %bb.0:
 ; SSE2-NEXT:    movss %xmm0, (%rdi)
 ; SSE2-NEXT:    movaps %xmm0, %xmm4
 ; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
@@ -1160,7 +1160,7 @@ define void @compressstore_v16f32_const(
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: compressstore_v16f32_const:
-; SSE42:       ## %bb.0: ## %cond.store
+; SSE42:       ## %bb.0:
 ; SSE42-NEXT:    movups %xmm0, (%rdi)
 ; SSE42-NEXT:    movups %xmm1, 16(%rdi)
 ; SSE42-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
@@ -1171,7 +1171,7 @@ define void @compressstore_v16f32_const(
 ; SSE42-NEXT:    retq
 ;
 ; AVX1-LABEL: compressstore_v16f32_const:
-; AVX1:       ## %bb.0: ## %cond.store
+; AVX1:       ## %bb.0:
 ; AVX1-NEXT:    vmovups %ymm0, (%rdi)
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
@@ -1183,7 +1183,7 @@ define void @compressstore_v16f32_const(
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: compressstore_v16f32_const:
-; AVX2:       ## %bb.0: ## %cond.store
+; AVX2:       ## %bb.0:
 ; AVX2-NEXT:    vmovups %ymm0, (%rdi)
 ; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [0,1,2,4]
 ; AVX2-NEXT:    vpermps %ymm1, %ymm0, %ymm0

Modified: llvm/trunk/test/CodeGen/X86/masked_expandload.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_expandload.ll?rev=367715&r1=367714&r2=367715&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_expandload.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_expandload.ll Fri Aug  2 13:04:34 2019
@@ -1368,7 +1368,7 @@ define <2 x float> @expandload_v2f32_v2i
 
 define <4 x float> @expandload_v4f32_const(float* %base, <4 x float> %src0) {
 ; SSE2-LABEL: expandload_v4f32_const:
-; SSE2:       ## %bb.0: ## %cond.load
+; SSE2:       ## %bb.0:
 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[0,0]
@@ -1379,7 +1379,7 @@ define <4 x float> @expandload_v4f32_con
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: expandload_v4f32_const:
-; SSE42:       ## %bb.0: ## %cond.load
+; SSE42:       ## %bb.0:
 ; SSE42-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE42-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 ; SSE42-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
@@ -1387,7 +1387,7 @@ define <4 x float> @expandload_v4f32_con
 ; SSE42-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: expandload_v4f32_const:
-; AVX1OR2:       ## %bb.0: ## %cond.load
+; AVX1OR2:       ## %bb.0:
 ; AVX1OR2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; AVX1OR2-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 ; AVX1OR2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
@@ -1423,7 +1423,7 @@ define <4 x float> @expandload_v4f32_con
 
 define <16 x float> @expandload_v16f32_const(float* %base, <16 x float> %src0) {
 ; SSE2-LABEL: expandload_v16f32_const:
-; SSE2:       ## %bb.0: ## %cond.load
+; SSE2:       ## %bb.0:
 ; SSE2-NEXT:    movups (%rdi), %xmm0
 ; SSE2-NEXT:    movups 16(%rdi), %xmm1
 ; SSE2-NEXT:    movss {{.*#+}} xmm5 = mem[0],zero,zero,zero
@@ -1443,7 +1443,7 @@ define <16 x float> @expandload_v16f32_c
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: expandload_v16f32_const:
-; SSE42:       ## %bb.0: ## %cond.load
+; SSE42:       ## %bb.0:
 ; SSE42-NEXT:    movups (%rdi), %xmm0
 ; SSE42-NEXT:    movups 16(%rdi), %xmm1
 ; SSE42-NEXT:    movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
@@ -1457,7 +1457,7 @@ define <16 x float> @expandload_v16f32_c
 ; SSE42-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: expandload_v16f32_const:
-; AVX1OR2:       ## %bb.0: ## %cond.load
+; AVX1OR2:       ## %bb.0:
 ; AVX1OR2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; AVX1OR2-NEXT:    vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7]
 ; AVX1OR2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3]
@@ -1507,7 +1507,7 @@ define <16 x float> @expandload_v16f32_c
 
 define <16 x float> @expandload_v16f32_const_undef(float* %base) {
 ; SSE2-LABEL: expandload_v16f32_const_undef:
-; SSE2:       ## %bb.0: ## %cond.load
+; SSE2:       ## %bb.0:
 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
 ; SSE2-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0]
@@ -1517,7 +1517,7 @@ define <16 x float> @expandload_v16f32_c
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: expandload_v16f32_const_undef:
-; SSE42:       ## %bb.0: ## %cond.load
+; SSE42:       ## %bb.0:
 ; SSE42-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
 ; SSE42-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
 ; SSE42-NEXT:    movups (%rdi), %xmm0
@@ -1526,7 +1526,7 @@ define <16 x float> @expandload_v16f32_c
 ; SSE42-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: expandload_v16f32_const_undef:
-; AVX1OR2:       ## %bb.0: ## %cond.load
+; AVX1OR2:       ## %bb.0:
 ; AVX1OR2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX1OR2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
 ; AVX1OR2-NEXT:    vinsertf128 $1, 44(%rdi), %ymm0, %ymm1
@@ -2991,18 +2991,18 @@ define <32 x float> @expandload_v32f32_v
 
 define <2 x i64> @expandload_v2i64_const(i64* %base, <2 x i64> %src0) {
 ; SSE2-LABEL: expandload_v2i64_const:
-; SSE2:       ## %bb.0: ## %else
+; SSE2:       ## %bb.0:
 ; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: expandload_v2i64_const:
-; SSE42:       ## %bb.0: ## %else
+; SSE42:       ## %bb.0:
 ; SSE42-NEXT:    pinsrq $1, (%rdi), %xmm0
 ; SSE42-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: expandload_v2i64_const:
-; AVX1OR2:       ## %bb.0: ## %else
+; AVX1OR2:       ## %bb.0:
 ; AVX1OR2-NEXT:    vpinsrq $1, (%rdi), %xmm0, %xmm0
 ; AVX1OR2-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/pr39666.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr39666.ll?rev=367715&r1=367714&r2=367715&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr39666.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr39666.ll Fri Aug  2 13:04:34 2019
@@ -3,7 +3,7 @@
 
 define <2 x i64> @test5(i64* %base, <2 x i64> %src0) {
 ; CHECK-LABEL: test5:
-; CHECK:       # %bb.0: # %else
+; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpinsrq $1, (%rdi), %xmm0, %xmm0
 ; CHECK-NEXT:    retq
  %res = call <2 x i64> @llvm.masked.expandload.v2i64(i64* %base, <2 x i1> <i1 false, i1 true>, <2 x i64> %src0)

Modified: llvm/trunk/test/CodeGen/X86/pr40994.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr40994.ll?rev=367715&r1=367714&r2=367715&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr40994.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr40994.ll Fri Aug  2 13:04:34 2019
@@ -3,7 +3,7 @@
 
 define <8 x i8> @foo(<16 x i8> %a) {
 ; CHECK-LABEL: foo:
-; CHECK:       # %bb.0: # %cond.store
+; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pextrb $0, %xmm0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    pextrb $2, %xmm0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    pextrb $4, %xmm0, -{{[0-9]+}}(%rsp)

Modified: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll?rev=367715&r1=367714&r2=367715&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll (original)
+++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll Fri Aug  2 13:04:34 2019
@@ -27,20 +27,12 @@ define void @scalarize_v2i64(i64* %p, <2
 
 define void @scalarize_v2i64_ones_mask(i64* %p, <2 x i64> %data) {
 ; CHECK-LABEL: @scalarize_v2i64_ones_mask(
-; CHECK-NEXT:    br i1 true, label [[COND_STORE:%.*]], label [[ELSE:%.*]]
-; CHECK:       cond.store:
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
-; CHECK-NEXT:    store i64 [[TMP1]], i64* [[P:%.*]], align 1
+; CHECK-NEXT:    [[ELT0:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0
+; CHECK-NEXT:    store i64 [[ELT0]], i64* [[TMP1]], align 1
+; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT:    br label [[ELSE]]
-; CHECK:       else:
-; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
-; CHECK-NEXT:    br i1 true, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
-; CHECK:       cond.store1:
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
-; CHECK-NEXT:    store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT:    br label [[ELSE2]]
-; CHECK:       else2:
+; CHECK-NEXT:    store i64 [[ELT1]], i64* [[TMP2]], align 1
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> <i1 true, i1 true>)
@@ -49,20 +41,6 @@ define void @scalarize_v2i64_ones_mask(i
 
 define void @scalarize_v2i64_zero_mask(i64* %p, <2 x i64> %data) {
 ; CHECK-LABEL: @scalarize_v2i64_zero_mask(
-; CHECK-NEXT:    br i1 false, label [[COND_STORE:%.*]], label [[ELSE:%.*]]
-; CHECK:       cond.store:
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
-; CHECK-NEXT:    store i64 [[TMP1]], i64* [[P:%.*]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT:    br label [[ELSE]]
-; CHECK:       else:
-; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
-; CHECK-NEXT:    br i1 false, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
-; CHECK:       cond.store1:
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
-; CHECK-NEXT:    store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT:    br label [[ELSE2]]
-; CHECK:       else2:
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> <i1 false, i1 false>)
@@ -71,20 +49,9 @@ define void @scalarize_v2i64_zero_mask(i
 
 define void @scalarize_v2i64_const_mask(i64* %p, <2 x i64> %data) {
 ; CHECK-LABEL: @scalarize_v2i64_const_mask(
-; CHECK-NEXT:    br i1 false, label [[COND_STORE:%.*]], label [[ELSE:%.*]]
-; CHECK:       cond.store:
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
-; CHECK-NEXT:    store i64 [[TMP1]], i64* [[P:%.*]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT:    br label [[ELSE]]
-; CHECK:       else:
-; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
-; CHECK-NEXT:    br i1 true, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
-; CHECK:       cond.store1:
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
-; CHECK-NEXT:    store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT:    br label [[ELSE2]]
-; CHECK:       else2:
+; CHECK-NEXT:    [[ELT1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0
+; CHECK-NEXT:    store i64 [[ELT1]], i64* [[TMP1]], align 1
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> <i1 false, i1 true>)

Modified: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll?rev=367715&r1=367714&r2=367715&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll (original)
+++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll Fri Aug  2 13:04:34 2019
@@ -29,23 +29,13 @@ define <2 x i64> @scalarize_v2i64(i64* %
 
 define <2 x i64> @scalarize_v2i64_ones_mask(i64* %p, <2 x i64> %passthru) {
 ; CHECK-LABEL: @scalarize_v2i64_ones_mask(
-; CHECK-NEXT:    br i1 true, label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
-; CHECK:       cond.load:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT:    br label [[ELSE]]
-; CHECK:       else:
-; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
-; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
-; CHECK-NEXT:    br i1 true, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
-; CHECK:       cond.load1:
-; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1
-; CHECK-NEXT:    br label [[ELSE2]]
-; CHECK:       else2:
-; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i64, i64* [[TMP1]], align 1
+; CHECK-NEXT:    [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, i64* [[TMP2]], align 1
+; CHECK-NEXT:    [[RES1:%.*]] = insertelement <2 x i64> [[RES0]], i64 [[LOAD1]], i64 1
+; CHECK-NEXT:    ret <2 x i64> [[RES1]]
 ;
   %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
   ret <2 x i64> %ret
@@ -53,23 +43,7 @@ define <2 x i64> @scalarize_v2i64_ones_m
 
 define <2 x i64> @scalarize_v2i64_zero_mask(i64* %p, <2 x i64> %passthru) {
 ; CHECK-LABEL: @scalarize_v2i64_zero_mask(
-; CHECK-NEXT:    br i1 false, label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
-; CHECK:       cond.load:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT:    br label [[ELSE]]
-; CHECK:       else:
-; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
-; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
-; CHECK-NEXT:    br i1 false, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
-; CHECK:       cond.load1:
-; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1
-; CHECK-NEXT:    br label [[ELSE2]]
-; CHECK:       else2:
-; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
+; CHECK-NEXT:    ret <2 x i64> [[PASSTHRU:%.*]]
 ;
   %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
   ret <2 x i64> %ret
@@ -77,23 +51,10 @@ define <2 x i64> @scalarize_v2i64_zero_m
 
 define <2 x i64> @scalarize_v2i64_const_mask(i64* %p, <2 x i64> %passthru) {
 ; CHECK-LABEL: @scalarize_v2i64_const_mask(
-; CHECK-NEXT:    br i1 false, label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
-; CHECK:       cond.load:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
-; CHECK-NEXT:    br label [[ELSE]]
-; CHECK:       else:
-; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
-; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
-; CHECK-NEXT:    br i1 true, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
-; CHECK:       cond.load1:
-; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1
-; CHECK-NEXT:    br label [[ELSE2]]
-; CHECK:       else2:
-; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, i64* [[TMP1]], align 1
+; CHECK-NEXT:    [[RES1:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD1]], i64 1
+; CHECK-NEXT:    ret <2 x i64> [[RES1]]
 ;
   %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
   ret <2 x i64> %ret




More information about the llvm-commits mailing list