[llvm] r343273 - [ScalarizeMaskedMemIntrin] When expanding masked gathers, start with the passthru vector and insert the new load results into it.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 27 14:28:59 PDT 2018
Author: ctopper
Date: Thu Sep 27 14:28:59 2018
New Revision: 343273
URL: http://llvm.org/viewvc/llvm-project?rev=343273&view=rev
Log:
[ScalarizeMaskedMemIntrin] When expanding masked gathers, start with the passthru vector and insert the new load results into it.
Previously we started with undef and did a final merge with the passthru at the end.
Modified:
llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll
Modified: llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp?rev=343273&r1=343272&r2=343273&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp Thu Sep 27 14:28:59 2018
@@ -368,10 +368,8 @@ static void scalarizeMaskedGather(CallIn
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
- Value *UndefVal = UndefValue::get(VecType);
-
// The result vector
- Value *VResult = UndefVal;
+ Value *VResult = Src0;
unsigned VectorWidth = VecType->getNumElements();
// Shorten the way if the mask is a vector of constants.
@@ -386,28 +384,17 @@ static void scalarizeMaskedGather(CallIn
VResult = Builder.CreateInsertElement(
VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
}
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
return;
}
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
// br i1 %Mask1, label %cond.load, label %else
//
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
"Mask" + Twine(Idx));
@@ -425,8 +412,9 @@ static void scalarizeMaskedGather(CallIn
"Ptr" + Twine(Idx));
LoadInst *Load =
Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
- "Res" + Twine(Idx));
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
@@ -436,13 +424,14 @@ static void scalarizeMaskedGather(CallIn
OldBr->eraseFromParent();
PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
+
+ PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(NewVResult, CondBlock);
+ Phi->addIncoming(VResult, PrevIfBlock);
+ VResult = Phi;
}
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
+ CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
}
Modified: llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll?rev=343273&r1=343272&r2=343273&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-masked-gather.ll Thu Sep 27 14:28:59 2018
@@ -30,25 +30,24 @@ define <2 x i32> @masked_gather_v2i32(<2
;
; NOGATHER-LABEL: masked_gather_v2i32:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB0_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: movl (%rax), %eax
+; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
; NOGATHER-NEXT: .LBB0_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB0_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: movl (%rax), %eax
-; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2
+; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
; NOGATHER-NEXT: .LBB0_4: # %else2
-; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
-; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x i32*>, <2 x i32*>* %ptr
@@ -80,26 +79,24 @@ define <4 x i32> @masked_gather_v2i32_co
;
; NOGATHER-LABEL: masked_gather_v2i32_concat:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB1_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: movl (%rax), %eax
+; NOGATHER-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
; NOGATHER-NEXT: .LBB1_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB1_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
; NOGATHER-NEXT: movl (%rax), %eax
-; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm2, %xmm2
+; NOGATHER-NEXT: vpinsrq $1, %rax, %xmm1, %xmm1
; NOGATHER-NEXT: .LBB1_4: # %else2
-; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
-; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
-; NOGATHER-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x i32*>, <2 x i32*>* %ptr
@@ -132,25 +129,23 @@ define <2 x float> @masked_gather_v2floa
;
; NOGATHER-LABEL: masked_gather_v2float:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB2_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
; NOGATHER-NEXT: .LBB2_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB2_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; NOGATHER-NEXT: .LBB2_4: # %else2
-; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
-; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x float*>, <2 x float*>* %ptr
@@ -180,25 +175,23 @@ define <4 x float> @masked_gather_v2floa
;
; NOGATHER-LABEL: masked_gather_v2float_concat:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB3_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3]
; NOGATHER-NEXT: .LBB3_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB3_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; NOGATHER-NEXT: .LBB3_4: # %else2
-; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
-; NOGATHER-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; NOGATHER-NEXT: vmovaps %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x float*>, <2 x float*>* %ptr
@@ -229,27 +222,26 @@ define <4 x i32> @masked_gather_v4i32(<4
; NOGATHER-LABEL: masked_gather_v4i32:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
-; NOGATHER-NEXT: # implicit-def: $xmm3
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB4_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm0, %rax
-; NOGATHER-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm2, %xmm2
; NOGATHER-NEXT: .LBB4_2: # %else
; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB4_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
-; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
+; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm2
; NOGATHER-NEXT: .LBB4_4: # %else2
; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB4_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
-; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4
-; NOGATHER-NEXT: vmovq %xmm4, %rax
-; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
+; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3
+; NOGATHER-NEXT: vmovq %xmm3, %rax
+; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm2
; NOGATHER-NEXT: .LBB4_6: # %else5
; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
@@ -257,10 +249,9 @@ define <4 x i32> @masked_gather_v4i32(<4
; NOGATHER-NEXT: # %bb.7: # %cond.load7
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
-; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3
+; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm2
; NOGATHER-NEXT: .LBB4_8: # %else8
-; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0
-; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
+; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0
; NOGATHER-NEXT: vzeroupper
; NOGATHER-NEXT: retq
entry:
@@ -289,27 +280,27 @@ define <4 x float> @masked_gather_v4floa
; NOGATHER-LABEL: masked_gather_v4float:
; NOGATHER: # %bb.0: # %entry
; NOGATHER-NEXT: vpextrb $0, %xmm1, %eax
-; NOGATHER-NEXT: # implicit-def: $xmm3
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB5_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
; NOGATHER-NEXT: vmovq %xmm0, %rax
; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3]
; NOGATHER-NEXT: .LBB5_2: # %else
; NOGATHER-NEXT: vpextrb $4, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB5_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
; NOGATHER-NEXT: .LBB5_4: # %else2
; NOGATHER-NEXT: vpextrb $8, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB5_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
-; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm4
-; NOGATHER-NEXT: vmovq %xmm4, %rax
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
+; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm3
+; NOGATHER-NEXT: vmovq %xmm3, %rax
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
; NOGATHER-NEXT: .LBB5_6: # %else5
; NOGATHER-NEXT: vpextrb $12, %xmm1, %eax
; NOGATHER-NEXT: testb $1, %al
@@ -317,10 +308,9 @@ define <4 x float> @masked_gather_v4floa
; NOGATHER-NEXT: # %bb.7: # %cond.load7
; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0
; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
; NOGATHER-NEXT: .LBB5_8: # %else8
-; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm0
-; NOGATHER-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0
+; NOGATHER-NEXT: vmovaps %xmm2, %xmm0
; NOGATHER-NEXT: vzeroupper
; NOGATHER-NEXT: retq
entry:
@@ -357,86 +347,81 @@ define <8 x i32> @masked_gather_v8i32(<8
;
; NOGATHER-LABEL: masked_gather_v8i32:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4
-; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
+; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $ymm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm4, %rax
-; NOGATHER-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vmovq %xmm3, %rax
+; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm1, %xmm4
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB6_2: # %else
; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
-; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm2, %xmm5
-; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
+; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
+; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm4
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB6_4: # %else2
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
-; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5
-; NOGATHER-NEXT: vmovq %xmm5, %rax
-; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm2, %xmm5
-; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
+; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
+; NOGATHER-NEXT: vmovq %xmm4, %rax
+; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm4
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB6_6: # %else5
; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_8
; NOGATHER-NEXT: # %bb.7: # %cond.load7
-; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4
-; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
-; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm4
-; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
+; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
+; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
+; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm1, %xmm3
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB6_8: # %else8
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_10
; NOGATHER-NEXT: # %bb.9: # %cond.load10
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
-; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm4, %xmm4
-; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
+; NOGATHER-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
+; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB6_10: # %else11
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_12
; NOGATHER-NEXT: # %bb.11: # %cond.load13
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
-; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm4, %xmm4
-; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
+; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
+; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB6_12: # %else14
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_14
; NOGATHER-NEXT: # %bb.13: # %cond.load16
-; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
-; NOGATHER-NEXT: vmovq %xmm4, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
-; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm4, %xmm4
-; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
+; NOGATHER-NEXT: vmovq %xmm3, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
+; NOGATHER-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
+; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB6_14: # %else17
; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB6_16
; NOGATHER-NEXT: # %bb.15: # %cond.load19
-; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
-; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm3, %xmm3
-; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
+; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
+; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm0, %xmm0
+; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB6_16: # %else20
-; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3
-; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
-; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
-; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
+; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
; NOGATHER-NEXT: retq
entry:
%ld = load <8 x i32*>, <8 x i32*>* %ptr
@@ -473,87 +458,82 @@ define <8 x float> @masked_gather_v8floa
;
; NOGATHER-LABEL: masked_gather_v8float:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %ymm4
-; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
+; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $ymm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm4, %rax
-; NOGATHER-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vmovq %xmm3, %rax
+; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0],ymm1[1,2,3,4,5,6,7]
; NOGATHER-NEXT: .LBB7_2: # %else
; NOGATHER-NEXT: vpextrb $2, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0],mem[0],xmm2[2,3]
-; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
+; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0],mem[0],xmm1[2,3]
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB7_4: # %else2
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
-; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm5
-; NOGATHER-NEXT: vmovq %xmm5, %rax
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm5 = xmm2[0,1],mem[0],xmm2[3]
-; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2,3],ymm2[4,5,6,7]
+; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
+; NOGATHER-NEXT: vmovq %xmm4, %rax
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm1[0,1],mem[0],xmm1[3]
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB7_6: # %else5
; NOGATHER-NEXT: vpextrb $6, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_8
; NOGATHER-NEXT: # %bb.7: # %cond.load7
-; NOGATHER-NEXT: vextractf128 $1, %ymm4, %xmm4
-; NOGATHER-NEXT: vpextrq $1, %xmm4, %rax
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm2[0,1,2],mem[0]
-; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
+; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
+; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],mem[0]
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB7_8: # %else8
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_10
; NOGATHER-NEXT: # %bb.9: # %cond.load10
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm5
-; NOGATHER-NEXT: vblendps {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3]
-; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm4
+; NOGATHER-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3]
+; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_10: # %else11
; NOGATHER-NEXT: vpextrb $10, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_12
; NOGATHER-NEXT: # %bb.11: # %cond.load13
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],mem[0],xmm4[2,3]
-; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
+; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_12: # %else14
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_14
; NOGATHER-NEXT: # %bb.13: # %cond.load16
-; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
-; NOGATHER-NEXT: vmovq %xmm4, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],mem[0],xmm4[3]
-; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
+; NOGATHER-NEXT: vmovq %xmm3, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
+; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_14: # %else17
; NOGATHER-NEXT: vpextrb $14, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB7_16
; NOGATHER-NEXT: # %bb.15: # %cond.load19
-; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
-; NOGATHER-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
-; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
+; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
+; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB7_16: # %else20
-; NOGATHER-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; NOGATHER-NEXT: vpslld $31, %xmm3, %xmm3
-; NOGATHER-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
-; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
-; NOGATHER-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
+; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
; NOGATHER-NEXT: retq
entry:
%ld = load <8 x float*>, <8 x float*>* %ptr
@@ -585,50 +565,44 @@ define <4 x i64> @masked_gather_v4i64(<4
;
; NOGATHER-LABEL: masked_gather_v4i64:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $ymm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB8_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm3
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB8_2: # %else
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB8_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm4
-; NOGATHER-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7]
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm3
+; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
; NOGATHER-NEXT: .LBB8_4: # %else2
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB8_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
-; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
-; NOGATHER-NEXT: vmovq %xmm4, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
-; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm4, %xmm4
-; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
+; NOGATHER-NEXT: vmovq %xmm3, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
+; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm3, %xmm3
+; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB8_6: # %else5
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB8_8
; NOGATHER-NEXT: # %bb.7: # %cond.load7
-; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
-; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm3, %xmm3
-; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
+; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
+; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm0, %xmm0
+; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB8_8: # %else8
-; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
-; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0
-; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3
-; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0
-; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
-; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
+; NOGATHER-NEXT: vmovaps %ymm1, %ymm0
; NOGATHER-NEXT: retq
entry:
%ld = load <4 x i64*>, <4 x i64*>* %ptr
@@ -660,50 +634,44 @@ define <4 x double> @masked_gather_v4dou
;
; NOGATHER-LABEL: masked_gather_v4double:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %ymm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $ymm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB9_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0],ymm1[1,2,3]
; NOGATHER-NEXT: .LBB9_2: # %else
; NOGATHER-NEXT: vpextrb $4, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB9_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm4 = xmm2[0],mem[0]
-; NOGATHER-NEXT: vblendpd {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3]
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm1[0],mem[0]
+; NOGATHER-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2,3]
; NOGATHER-NEXT: .LBB9_4: # %else2
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB9_6
; NOGATHER-NEXT: # %bb.5: # %cond.load4
-; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm4
-; NOGATHER-NEXT: vmovq %xmm4, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm4
-; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm4 = mem[0],xmm4[1]
-; NOGATHER-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
+; NOGATHER-NEXT: vmovq %xmm3, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm3
+; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm3 = mem[0],xmm3[1]
+; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB9_6: # %else5
; NOGATHER-NEXT: vpextrb $12, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB9_8
; NOGATHER-NEXT: # %bb.7: # %cond.load7
-; NOGATHER-NEXT: vextractf128 $1, %ymm3, %xmm3
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm3
-; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm3 = xmm3[0],mem[0]
-; NOGATHER-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0
+; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax
+; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0
+; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
; NOGATHER-NEXT: .LBB9_8: # %else8
-; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0
-; NOGATHER-NEXT: vpsrad $31, %xmm0, %xmm0
-; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm3
-; NOGATHER-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; NOGATHER-NEXT: vpmovsxdq %xmm0, %xmm0
-; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
-; NOGATHER-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
+; NOGATHER-NEXT: vmovapd %ymm1, %ymm0
; NOGATHER-NEXT: retq
entry:
%ld = load <4 x double*>, <4 x double*>* %ptr
@@ -733,24 +701,22 @@ define <2 x i64> @masked_gather_v2i64(<2
;
; NOGATHER-LABEL: masked_gather_v2i64:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB10_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: vpinsrq $0, (%rax), %xmm1, %xmm1
; NOGATHER-NEXT: .LBB10_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB10_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm2, %xmm2
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm1
; NOGATHER-NEXT: .LBB10_4: # %else2
-; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
-; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x i64*>, <2 x i64*>* %ptr
@@ -780,24 +746,22 @@ define <2 x double> @masked_gather_v2dou
;
; NOGATHER-LABEL: masked_gather_v2double:
; NOGATHER: # %bb.0: # %entry
-; NOGATHER-NEXT: vmovdqa (%rdi), %xmm3
+; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2
; NOGATHER-NEXT: vpextrb $0, %xmm0, %eax
-; NOGATHER-NEXT: # implicit-def: $xmm2
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB11_2
; NOGATHER-NEXT: # %bb.1: # %cond.load
-; NOGATHER-NEXT: vmovq %xmm3, %rax
-; NOGATHER-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
+; NOGATHER-NEXT: vmovq %xmm2, %rax
+; NOGATHER-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
; NOGATHER-NEXT: .LBB11_2: # %else
; NOGATHER-NEXT: vpextrb $8, %xmm0, %eax
; NOGATHER-NEXT: testb $1, %al
; NOGATHER-NEXT: je .LBB11_4
; NOGATHER-NEXT: # %bb.3: # %cond.load1
-; NOGATHER-NEXT: vpextrq $1, %xmm3, %rax
-; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0]
+; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
; NOGATHER-NEXT: .LBB11_4: # %else2
-; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0
-; NOGATHER-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; NOGATHER-NEXT: vmovapd %xmm1, %xmm0
; NOGATHER-NEXT: retq
entry:
%ld = load <2 x double*>, <2 x double*>* %ptr
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=343273&r1=343272&r2=343273&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Thu Sep 27 14:28:59 2018
@@ -1658,38 +1658,35 @@ declare <3 x i32> @llvm.masked.gather.v3
define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
; KNL_64-LABEL: test30:
; KNL_64: # %bb.0:
-; KNL_64-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2
-; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
-; KNL_64-NEXT: kmovw %k1, %eax
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k0
+; KNL_64-NEXT: kmovw %k0, %eax
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
-; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
+; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; KNL_64-NEXT: testb $1, %al
-; KNL_64-NEXT: # implicit-def: $xmm0
; KNL_64-NEXT: je .LBB31_2
; KNL_64-NEXT: # %bb.1: # %cond.load
-; KNL_64-NEXT: vmovq %xmm1, %rax
-; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; KNL_64-NEXT: vmovq %xmm0, %rax
+; KNL_64-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
; KNL_64-NEXT: .LBB31_2: # %else
-; KNL_64-NEXT: kshiftrw $1, %k1, %k0
-; KNL_64-NEXT: kmovw %k0, %eax
+; KNL_64-NEXT: kshiftrw $1, %k0, %k1
+; KNL_64-NEXT: kmovw %k1, %eax
; KNL_64-NEXT: testb $1, %al
; KNL_64-NEXT: je .LBB31_4
; KNL_64-NEXT: # %bb.3: # %cond.load1
-; KNL_64-NEXT: vpextrq $1, %xmm1, %rax
-; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
+; KNL_64-NEXT: vpextrq $1, %xmm0, %rax
+; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
; KNL_64-NEXT: .LBB31_4: # %else2
-; KNL_64-NEXT: kshiftrw $2, %k1, %k0
+; KNL_64-NEXT: kshiftrw $2, %k0, %k0
; KNL_64-NEXT: kmovw %k0, %eax
; KNL_64-NEXT: testb $1, %al
; KNL_64-NEXT: je .LBB31_6
; KNL_64-NEXT: # %bb.5: # %cond.load4
-; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
-; KNL_64-NEXT: vmovq %xmm1, %rax
-; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
+; KNL_64-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL_64-NEXT: vmovq %xmm0, %rax
+; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
; KNL_64-NEXT: .LBB31_6: # %else5
-; KNL_64-NEXT: vmovdqa32 %zmm0, %zmm3 {%k1}
; KNL_64-NEXT: vmovdqa %xmm3, %xmm0
; KNL_64-NEXT: vzeroupper
; KNL_64-NEXT: retq
@@ -1698,37 +1695,35 @@ define <3 x i32> @test30(<3 x i32*> %bas
; KNL_32: # %bb.0:
; KNL_32-NEXT: subl $12, %esp
; KNL_32-NEXT: .cfi_def_cfa_offset 16
-; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2
-; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1
-; KNL_32-NEXT: kmovw %k1, %eax
+; KNL_32-NEXT: vmovdqa %xmm0, %xmm3
+; KNL_32-NEXT: vpslld $31, %xmm2, %xmm0
+; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL_32-NEXT: kmovw %k0, %eax
+; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
-; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
+; KNL_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1
; KNL_32-NEXT: testb $1, %al
-; KNL_32-NEXT: # implicit-def: $xmm1
; KNL_32-NEXT: je .LBB31_2
; KNL_32-NEXT: # %bb.1: # %cond.load
-; KNL_32-NEXT: vmovd %xmm2, %eax
-; KNL_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; KNL_32-NEXT: vmovd %xmm1, %eax
+; KNL_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0
; KNL_32-NEXT: .LBB31_2: # %else
-; KNL_32-NEXT: kshiftrw $1, %k1, %k0
-; KNL_32-NEXT: kmovw %k0, %eax
+; KNL_32-NEXT: kshiftrw $1, %k0, %k1
+; KNL_32-NEXT: kmovw %k1, %eax
; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: je .LBB31_4
; KNL_32-NEXT: # %bb.3: # %cond.load1
-; KNL_32-NEXT: vpextrd $1, %xmm2, %eax
-; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
+; KNL_32-NEXT: vpextrd $1, %xmm1, %eax
+; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
; KNL_32-NEXT: .LBB31_4: # %else2
-; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
-; KNL_32-NEXT: kshiftrw $2, %k1, %k0
+; KNL_32-NEXT: kshiftrw $2, %k0, %k0
; KNL_32-NEXT: kmovw %k0, %eax
; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: je .LBB31_6
; KNL_32-NEXT: # %bb.5: # %cond.load4
-; KNL_32-NEXT: vpextrd $2, %xmm2, %eax
-; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
+; KNL_32-NEXT: vpextrd $2, %xmm1, %eax
+; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
; KNL_32-NEXT: .LBB31_6: # %else5
-; KNL_32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
-; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL_32-NEXT: addl $12, %esp
; KNL_32-NEXT: .cfi_def_cfa_offset 4
; KNL_32-NEXT: vzeroupper
@@ -1737,36 +1732,34 @@ define <3 x i32> @test30(<3 x i32*> %bas
; SKX-LABEL: test30:
; SKX: # %bb.0:
; SKX-NEXT: vpslld $31, %xmm2, %xmm2
-; SKX-NEXT: vpmovd2m %xmm2, %k1
-; SKX-NEXT: kmovw %k1, %eax
+; SKX-NEXT: vpmovd2m %xmm2, %k0
+; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; SKX-NEXT: testb $1, %al
-; SKX-NEXT: # implicit-def: $xmm0
; SKX-NEXT: je .LBB31_2
; SKX-NEXT: # %bb.1: # %cond.load
-; SKX-NEXT: vmovq %xmm1, %rax
-; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SKX-NEXT: vmovq %xmm0, %rax
+; SKX-NEXT: vpinsrd $0, (%rax), %xmm3, %xmm3
; SKX-NEXT: .LBB31_2: # %else
-; SKX-NEXT: kshiftrw $1, %k1, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kshiftrw $1, %k0, %k1
+; SKX-NEXT: kmovw %k1, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: je .LBB31_4
; SKX-NEXT: # %bb.3: # %cond.load1
-; SKX-NEXT: vpextrq $1, %xmm1, %rax
-; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
+; SKX-NEXT: vpextrq $1, %xmm0, %rax
+; SKX-NEXT: vpinsrd $1, (%rax), %xmm3, %xmm3
; SKX-NEXT: .LBB31_4: # %else2
-; SKX-NEXT: kshiftrw $2, %k1, %k0
+; SKX-NEXT: kshiftrw $2, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: je .LBB31_6
; SKX-NEXT: # %bb.5: # %cond.load4
-; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1
-; SKX-NEXT: vmovq %xmm1, %rax
-; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
+; SKX-NEXT: vmovq %xmm0, %rax
+; SKX-NEXT: vpinsrd $2, (%rax), %xmm3, %xmm3
; SKX-NEXT: .LBB31_6: # %else5
-; SKX-NEXT: vmovdqa32 %xmm0, %xmm3 {%k1}
; SKX-NEXT: vmovdqa %xmm3, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@@ -1775,36 +1768,35 @@ define <3 x i32> @test30(<3 x i32*> %bas
; SKX_32: # %bb.0:
; SKX_32-NEXT: subl $12, %esp
; SKX_32-NEXT: .cfi_def_cfa_offset 16
-; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
-; SKX_32-NEXT: vpmovd2m %xmm2, %k1
-; SKX_32-NEXT: kmovw %k1, %eax
+; SKX_32-NEXT: vmovdqa %xmm0, %xmm3
+; SKX_32-NEXT: vpslld $31, %xmm2, %xmm0
+; SKX_32-NEXT: vpmovd2m %xmm0, %k0
+; SKX_32-NEXT: kmovw %k0, %eax
+; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
-; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
+; SKX_32-NEXT: vpaddd %xmm1, %xmm3, %xmm1
; SKX_32-NEXT: testb $1, %al
-; SKX_32-NEXT: # implicit-def: $xmm1
; SKX_32-NEXT: je .LBB31_2
; SKX_32-NEXT: # %bb.1: # %cond.load
-; SKX_32-NEXT: vmovd %xmm2, %eax
-; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SKX_32-NEXT: vmovd %xmm1, %eax
+; SKX_32-NEXT: vpinsrd $0, (%eax), %xmm0, %xmm0
; SKX_32-NEXT: .LBB31_2: # %else
-; SKX_32-NEXT: kshiftrw $1, %k1, %k0
-; SKX_32-NEXT: kmovw %k0, %eax
+; SKX_32-NEXT: kshiftrw $1, %k0, %k1
+; SKX_32-NEXT: kmovw %k1, %eax
; SKX_32-NEXT: testb $1, %al
; SKX_32-NEXT: je .LBB31_4
; SKX_32-NEXT: # %bb.3: # %cond.load1
-; SKX_32-NEXT: vpextrd $1, %xmm2, %eax
-; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
+; SKX_32-NEXT: vpextrd $1, %xmm1, %eax
+; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
; SKX_32-NEXT: .LBB31_4: # %else2
-; SKX_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0
-; SKX_32-NEXT: kshiftrw $2, %k1, %k0
+; SKX_32-NEXT: kshiftrw $2, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
; SKX_32-NEXT: testb $1, %al
; SKX_32-NEXT: je .LBB31_6
; SKX_32-NEXT: # %bb.5: # %cond.load4
-; SKX_32-NEXT: vpextrd $2, %xmm2, %eax
-; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
+; SKX_32-NEXT: vpextrd $2, %xmm1, %eax
+; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
; SKX_32-NEXT: .LBB31_6: # %else5
-; SKX_32-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
; SKX_32-NEXT: addl $12, %esp
; SKX_32-NEXT: .cfi_def_cfa_offset 4
; SKX_32-NEXT: retl
Modified: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll?rev=343273&r1=343272&r2=343273&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll (original)
+++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll Thu Sep 27 14:28:59 2018
@@ -8,10 +8,10 @@ define <2 x i64> @scalarize_v2i64(<2 x i
; CHECK: cond.load:
; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 0
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8
-; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD0]], i32 0
+; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i32 0
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
-; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ undef, [[TMP0:%.*]] ]
+; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
; CHECK-NEXT: [[MASK1:%.*]] = extractelement <2 x i1> [[MASK]], i32 1
; CHECK-NEXT: br i1 [[MASK1]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
; CHECK: cond.load1:
@@ -20,9 +20,8 @@ define <2 x i64> @scalarize_v2i64(<2 x i
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[LOAD1]], i32 1
; CHECK-NEXT: br label [[ELSE2]]
; CHECK: else2:
-; CHECK-NEXT: [[RES_PHI_SELECT:%.*]] = phi <2 x i64> [ [[RES1]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK]], <2 x i64> [[RES_PHI_SELECT]], <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[RES1]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
;
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> %mask, <2 x i64> %passthru)
ret <2 x i64> %ret
@@ -32,12 +31,11 @@ define <2 x i64> @scalarize_v2i64_ones_m
; CHECK-LABEL: @scalarize_v2i64_ones_mask(
; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 0
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8
-; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD0]], i32 0
+; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i32 0
; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x i64*> [[P]], i32 1
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES0]], i64 [[LOAD1]], i32 1
-; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> <i1 true, i1 true>, <2 x i64> [[RES1]], <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+; CHECK-NEXT: ret <2 x i64> [[RES1]]
;
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret
@@ -45,8 +43,7 @@ define <2 x i64> @scalarize_v2i64_ones_m
define <2 x i64> @scalarize_v2i64_zero_mask(<2 x i64*> %p, <2 x i64> %passthru) {
; CHECK-LABEL: @scalarize_v2i64_zero_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> zeroinitializer, <2 x i64> undef, <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
;
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
ret <2 x i64> %ret
@@ -56,9 +53,8 @@ define <2 x i64> @scalarize_v2i64_const_
; CHECK-LABEL: @scalarize_v2i64_const_mask(
; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x i64*> [[P:%.*]], i32 1
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
-; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> undef, i64 [[LOAD1]], i32 1
-; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> <i1 false, i1 true>, <2 x i64> [[RES1]], <2 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD1]], i32 1
+; CHECK-NEXT: ret <2 x i64> [[RES1]]
;
%ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret
More information about the llvm-commits
mailing list