[llvm] 1257315 - [Scalarizer] Regenerate test checks (NFC)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Tue May 31 08:23:23 PDT 2022


Author: Nikita Popov
Date: 2022-05-31T17:23:15+02:00
New Revision: 1257315b20d5f2487c4445c129b7437d5f2debfd

URL: https://github.com/llvm/llvm-project/commit/1257315b20d5f2487c4445c129b7437d5f2debfd
DIFF: https://github.com/llvm/llvm-project/commit/1257315b20d5f2487c4445c129b7437d5f2debfd.diff

LOG: [Scalarizer] Regenerate test checks (NFC)

Added: 
    

Modified: 
    llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
    llvm/test/Transforms/Scalarizer/basic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
index 5eb04df3fe777..7e95ca17d706d 100644
--- a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
+++ b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -6,57 +7,58 @@ declare <4 x float> @ext(<4 x float>)
 
 define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
 ; CHECK-LABEL: @f1(
-; CHECK: entry:
-; CHECK:   %init.i0 = extractelement <4 x float> %init, i32 0
-; CHECK:   %init.i1 = extractelement <4 x float> %init, i32 1
-; CHECK:   %init.i2 = extractelement <4 x float> %init, i32 2
-; CHECK:   %init.i3 = extractelement <4 x float> %init, i32 3
-; CHECK:   br label %loop
-; CHECK: loop:
-; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
-; CHECK:   %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ]
-; CHECK:   %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ]
-; CHECK:   %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ]
-; CHECK:   %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ]
-; CHECK:   %nexti = sub i32 %i, 1
-; CHECK:   %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
-; CHECK:   %ptr.i0 = bitcast <4 x float>* %ptr to float*
-; CHECK:   %val.i0 = load float, float* %ptr.i0, align 16
-; CHECK:   %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
-; CHECK:   %val.i1 = load float, float* %ptr.i1, align 4
-; CHECK:   %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
-; CHECK:   %val.i2 = load float, float* %ptr.i2, align 8
-; CHECK:   %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
-; CHECK:   %val.i3 = load float, float* %ptr.i3, align 4
-; CHECK:   %add.i0 = fadd float %val.i0, %val.i2
-; CHECK:   %add.i1 = fadd float %val.i1, %val.i3
-; CHECK:   %add.i2 = fadd float %acc.i0, %acc.i2
-; CHECK:   %add.i3 = fadd float %acc.i1, %acc.i3
-; CHECK:   %add.upto0 = insertelement <4 x float> poison, float %add.i0, i32 0
-; CHECK:   %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
-; CHECK:   %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
-; CHECK:   %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
-; CHECK:   %call = call <4 x float> @ext(<4 x float> %add)
-; CHECK:   %call.i0 = extractelement <4 x float> %call, i32 0
-; CHECK:   %cmp.i0 = fcmp ogt float %call.i0, 1.0
-; CHECK:   %call.i1 = extractelement <4 x float> %call, i32 1
-; CHECK:   %cmp.i1 = fcmp ogt float %call.i1, 2.0
-; CHECK:   %call.i2 = extractelement <4 x float> %call, i32 2
-; CHECK:   %cmp.i2 = fcmp ogt float %call.i2, 3.0
-; CHECK:   %call.i3 = extractelement <4 x float> %call, i32 3
-; CHECK:   %cmp.i3 = fcmp ogt float %call.i3, 4.0
-; CHECK:   %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0
-; CHECK:   %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0
-; CHECK:   %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0
-; CHECK:   %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0
-; CHECK:   store float %sel.i0, float* %ptr.i0
-; CHECK:   store float %sel.i1, float* %ptr.i1
-; CHECK:   store float %sel.i2, float* %ptr.i2
-; CHECK:   store float %sel.i3, float* %ptr.i3
-; CHECK:   %test = icmp eq i32 %nexti, 0
-; CHECK:   br i1 %test, label %loop, label %exit
-; CHECK: exit:
-; CHECK:   ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i32 0
+; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i32 1
+; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i32 2
+; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i32 3
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16
+; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4
+; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8
+; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4
+; CHECK-NEXT:    [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]]
+; CHECK-NEXT:    [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i32 0
+; CHECK-NEXT:    [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i32 1
+; CHECK-NEXT:    [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i32 3
+; CHECK-NEXT:    [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]])
+; CHECK-NEXT:    [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0
+; CHECK-NEXT:    [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
+; CHECK-NEXT:    [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1
+; CHECK-NEXT:    [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
+; CHECK-NEXT:    [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2
+; CHECK-NEXT:    [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
+; CHECK-NEXT:    [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3
+; CHECK-NEXT:    [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
+; CHECK-NEXT:    [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
+; CHECK-NEXT:    [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
+; CHECK-NEXT:    [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
+; CHECK-NEXT:    [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
+; CHECK-NEXT:    store float [[SEL_I0]], float* [[PTR_I0]], align 16
+; CHECK-NEXT:    store float [[SEL_I1]], float* [[PTR_I1]], align 4
+; CHECK-NEXT:    store float [[SEL_I2]], float* [[PTR_I2]], align 8
+; CHECK-NEXT:    store float [[SEL_I3]], float* [[PTR_I3]], align 4
+; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
+; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -70,17 +72,17 @@ loop:
   %dval = bitcast <4 x float> %val to <2 x double>
   %dacc = bitcast <4 x float> %acc to <2 x double>
   %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
-                            <2 x i32> <i32 0, i32 2>
+  <2 x i32> <i32 0, i32 2>
   %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
-                            <2 x i32> <i32 1, i32 3>
+  <2 x i32> <i32 1, i32 3>
   %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
   %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
   %add = fadd <4 x float> %f1, %f2
   %call = call <4 x float> @ext(<4 x float> %add)
   %cmp = fcmp ogt <4 x float> %call,
-                  <float 1.0, float 2.0, float 3.0, float 4.0>
+  <float 1.0, float 2.0, float 3.0, float 4.0>
   %sel = select <4 x i1> %cmp, <4 x float> %call,
-                <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
+  <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
   store <4 x float> %sel, <4 x float> *%ptr
 
   %test = icmp eq i32 %nexti, 0
@@ -91,57 +93,58 @@ exit:
 }
 
 define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
-; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) {
-; CHECK: entry:
-; CHECK:   %init.i0 = extractelement <4 x i32> %init, i32 0
-; CHECK:   %init.i1 = extractelement <4 x i32> %init, i32 1
-; CHECK:   %init.i2 = extractelement <4 x i32> %init, i32 2
-; CHECK:   %init.i3 = extractelement <4 x i32> %init, i32 3
-; CHECK:   br label %loop
-; CHECK: loop:
-; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
-; CHECK:   %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ]
-; CHECK:   %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ]
-; CHECK:   %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ]
-; CHECK:   %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ]
-; CHECK:   %nexti = sub i32 %i, 1
-; CHECK:   %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i
-; CHECK:   %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
-; CHECK:   %val.i0 = load i8, i8* %ptr.i0, align 4
-; CHECK:   %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1
-; CHECK:   %val.i1 = load i8, i8* %ptr.i1, align 1
-; CHECK:   %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2
-; CHECK:   %val.i2 = load i8, i8* %ptr.i2, align 2
-; CHECK:   %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3
-; CHECK:   %val.i3 = load i8, i8* %ptr.i3, align 1
-; CHECK:   %ext.i0 = sext i8 %val.i0 to i32
-; CHECK:   %ext.i1 = sext i8 %val.i1 to i32
-; CHECK:   %ext.i2 = sext i8 %val.i2 to i32
-; CHECK:   %ext.i3 = sext i8 %val.i3 to i32
-; CHECK:   %add.i0 = add i32 %ext.i0, %acc.i0
-; CHECK:   %add.i1 = add i32 %ext.i1, %acc.i1
-; CHECK:   %add.i2 = add i32 %ext.i2, %acc.i2
-; CHECK:   %add.i3 = add i32 %ext.i3, %acc.i3
-; CHECK:   %cmp.i0 = icmp slt i32 %add.i0, -10
-; CHECK:   %cmp.i1 = icmp slt i32 %add.i1, -11
-; CHECK:   %cmp.i2 = icmp slt i32 %add.i2, -12
-; CHECK:   %cmp.i3 = icmp slt i32 %add.i3, -13
-; CHECK:   %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i
-; CHECK:   %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i
-; CHECK:   %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i
-; CHECK:   %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i
-; CHECK:   %trunc.i0 = trunc i32 %sel.i0 to i8
-; CHECK:   %trunc.i1 = trunc i32 %sel.i1 to i8
-; CHECK:   %trunc.i2 = trunc i32 %sel.i2 to i8
-; CHECK:   %trunc.i3 = trunc i32 %sel.i3 to i8
-; CHECK:   store i8 %trunc.i0, i8* %ptr.i0, align 4
-; CHECK:   store i8 %trunc.i1, i8* %ptr.i1, align 1
-; CHECK:   store i8 %trunc.i2, i8* %ptr.i2, align 2
-; CHECK:   store i8 %trunc.i3, i8* %ptr.i3, align 1
-; CHECK:   %test = icmp eq i32 %nexti, 0
-; CHECK:   br i1 %test, label %loop, label %exit
-; CHECK: exit:
-; CHECK:   ret void
+; CHECK-LABEL: @f2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i32 0
+; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i32 1
+; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i32 2
+; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i32 3
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x i8>, <4 x i8>* [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <4 x i8>* [[PTR]] to i8*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i8, i8* [[PTR_I0]], align 4
+; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i8, i8* [[PTR_I1]], align 1
+; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i8, i8* [[PTR_I2]], align 2
+; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i8, i8* [[PTR_I3]], align 1
+; CHECK-NEXT:    [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32
+; CHECK-NEXT:    [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32
+; CHECK-NEXT:    [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32
+; CHECK-NEXT:    [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]]
+; CHECK-NEXT:    [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10
+; CHECK-NEXT:    [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11
+; CHECK-NEXT:    [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12
+; CHECK-NEXT:    [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13
+; CHECK-NEXT:    [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]]
+; CHECK-NEXT:    [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]]
+; CHECK-NEXT:    [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]]
+; CHECK-NEXT:    [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]]
+; CHECK-NEXT:    [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8
+; CHECK-NEXT:    [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8
+; CHECK-NEXT:    [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8
+; CHECK-NEXT:    [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8
+; CHECK-NEXT:    store i8 [[TRUNC_I0]], i8* [[PTR_I0]], align 4
+; CHECK-NEXT:    store i8 [[TRUNC_I1]], i8* [[PTR_I1]], align 1
+; CHECK-NEXT:    store i8 [[TRUNC_I2]], i8* [[PTR_I2]], align 2
+; CHECK-NEXT:    store i8 [[TRUNC_I3]], i8* [[PTR_I3]], align 1
+; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
+; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -157,7 +160,7 @@ loop:
   %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
   %single = insertelement <4 x i32> poison, i32 %i, i32 0
   %limit = shufflevector <4 x i32> %single, <4 x i32> poison,
-                         <4 x i32> zeroinitializer
+  <4 x i32> zeroinitializer
   %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
   %trunc = trunc <4 x i32> %sel to <4 x i8>
   store <4 x i8> %trunc, <4 x i8> *%ptr
@@ -172,15 +175,28 @@ exit:
 ; Check that !tbaa information is preserved.
 define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f3(
-; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
-; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]]
-; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]]
-; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]]
-; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
-; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
-; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
-; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
-; CHECK: ret void
+; CHECK-NEXT:    [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
+; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
+; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
@@ -190,15 +206,28 @@ define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
 ; Check that !tbaa.struct information is preserved.
 define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f4(
-; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
-; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
-; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
-; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
-; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
-; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
-; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
-; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
-; CHECK: ret void
+; CHECK-NEXT:    [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
+; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
+; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa.struct !5
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa.struct !5
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa.struct !5
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa.struct !5
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa.struct !5
+; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa.struct !5
+; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa.struct !5
+; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa.struct !5
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
@@ -208,15 +237,38 @@ define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
 ; Check that llvm.access.group information is preserved.
 define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f5(
-; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.access.group ![[TAG:[0-9]*]]
-; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.access.group ![[TAG]]
-; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.access.group ![[TAG]]
-; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.access.group ![[TAG]]
-; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.access.group ![[TAG]]
-; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.access.group ![[TAG]]
-; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.access.group ![[TAG]]
-; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.access.group ![[TAG]]
-; CHECK: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_SRC:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[SRC:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[THIS_SRC_I0:%.*]] = bitcast <4 x i32>* [[THIS_SRC]] to i32*
+; CHECK-NEXT:    [[THIS_SRC_I1:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 1
+; CHECK-NEXT:    [[THIS_SRC_I2:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 2
+; CHECK-NEXT:    [[THIS_SRC_I3:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 3
+; CHECK-NEXT:    [[THIS_DST:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[DST:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[THIS_DST_I0:%.*]] = bitcast <4 x i32>* [[THIS_DST]] to i32*
+; CHECK-NEXT:    [[THIS_DST_I1:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 1
+; CHECK-NEXT:    [[THIS_DST_I2:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 2
+; CHECK-NEXT:    [[THIS_DST_I3:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[THIS_SRC_I0]], align 16, !llvm.access.group !6
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[THIS_SRC_I1]], align 4, !llvm.access.group !6
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[THIS_SRC_I2]], align 8, !llvm.access.group !6
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[THIS_SRC_I3]], align 4, !llvm.access.group !6
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[THIS_DST_I0]], align 16, !llvm.access.group !6
+; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[THIS_DST_I1]], align 4, !llvm.access.group !6
+; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[THIS_DST_I2]], align 8, !llvm.access.group !6
+; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[THIS_DST_I3]], align 4, !llvm.access.group !6
+; CHECK-NEXT:    [[NEXT_INDEX]] = add i32 [[INDEX]], -1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -238,29 +290,50 @@ end:
 ; Check that fpmath information is preserved.
 define <4 x float> @f6(<4 x float> %x) {
 ; CHECK-LABEL: @f6(
-; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0
-; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]]
-; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1
-; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]]
-; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2
-; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]]
-; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3
-; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]]
-; CHECK: %res.upto0 = insertelement <4 x float> poison, float %res.i0, i32 0
-; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1
-; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2
-; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3
-; CHECK: ret <4 x float> %res
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <4 x float> [[X]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[X_I2:%.*]] = extractelement <4 x float> [[X]], i32 2
+; CHECK-NEXT:    [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[X_I3:%.*]] = extractelement <4 x float> [[X]], i32 3
+; CHECK-NEXT:    [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i32 2
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
   %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
-    !fpmath !4
+  !fpmath !4
   ret <4 x float> %res
 }
 
 ; Check that random metadata isn't kept.
 define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f7(
-; CHECK-NOT: !foo
-; CHECK: ret void
+; CHECK-NEXT:    [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
+; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
+; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[DST_I0]], align 16
+; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[DST_I1]], align 4
+; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[DST_I2]], align 8
+; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[DST_I3]], align 4
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x i32> , <4 x i32> *%src, !foo !5
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%dst, !foo !5
@@ -269,26 +342,27 @@ define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
 
 ; Test GEP with vectors.
 define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
-                float *%other) {
 ; CHECK-LABEL: @f8(
-; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
-; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
-; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
-; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
-; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
-; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
-; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
-; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
-; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
-; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100
-; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1
-; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100
-; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3
-; CHECK: store float* %val.i0, float** %dest.i0, align 32
-; CHECK: store float* %val.i1, float** %dest.i1, align 8
-; CHECK: store float* %val.i2, float** %dest.i2, align 16
-; CHECK: store float* %val.i3, float** %dest.i3, align 8
-; CHECK: ret void
+; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float**
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1
+; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2
+; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3
+; CHECK-NEXT:    [[PTR0_I0:%.*]] = extractelement <4 x float*> [[PTR0:%.*]], i32 0
+; CHECK-NEXT:    [[PTR0_I2:%.*]] = extractelement <4 x float*> [[PTR0]], i32 2
+; CHECK-NEXT:    [[PTR0_I3:%.*]] = extractelement <4 x float*> [[PTR0]], i32 3
+; CHECK-NEXT:    [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i32 1
+; CHECK-NEXT:    [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i32 3
+; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr float, float* [[PTR0_I0]], i32 100
+; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr float, float* [[OTHER:%.*]], i32 [[I0_I1]]
+; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr float, float* [[PTR0_I2]], i32 100
+; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr float, float* [[PTR0_I3]], i32 [[I0_I3]]
+; CHECK-NEXT:    store float* [[VAL_I0]], float** [[DEST_I0]], align 32
+; CHECK-NEXT:    store float* [[VAL_I1]], float** [[DEST_I1]], align 8
+; CHECK-NEXT:    store float* [[VAL_I2]], float** [[DEST_I2]], align 16
+; CHECK-NEXT:    store float* [[VAL_I3]], float** [[DEST_I3]], align 8
+; CHECK-NEXT:    ret void
+;
+  float *%other) {
   %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
   %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
   %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
@@ -299,24 +373,25 @@ define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
 
 ; Test the handling of unaligned loads.
 define void @f9(<4 x float> *%dest, <4 x float> *%src) {
-; CHECK: @f9(
-; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
-; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
-; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
-; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
-; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
-; CHECK: %val.i0 = load float, float* %src.i0, align 4
-; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
-; CHECK: %val.i1 = load float, float* %src.i1, align 4
-; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
-; CHECK: %val.i2 = load float, float* %src.i2, align 4
-; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
-; CHECK: %val.i3 = load float, float* %src.i3, align 4
-; CHECK: store float %val.i0, float* %dest.i0, align 8
-; CHECK: store float %val.i1, float* %dest.i1, align 4
-; CHECK: store float %val.i2, float* %dest.i2, align 8
-; CHECK: store float %val.i3, float* %dest.i3, align 4
-; CHECK: ret void
+; CHECK-LABEL: @f9(
+; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float*
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1
+; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2
+; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 4
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 4
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 4
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 4
+; CHECK-NEXT:    store float [[VAL_I0]], float* [[DEST_I0]], align 8
+; CHECK-NEXT:    store float [[VAL_I1]], float* [[DEST_I1]], align 4
+; CHECK-NEXT:    store float [[VAL_I2]], float* [[DEST_I2]], align 8
+; CHECK-NEXT:    store float [[VAL_I3]], float* [[DEST_I3]], align 4
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x float> , <4 x float> *%src, align 4
   store <4 x float> %val, <4 x float> *%dest, align 8
   ret void
@@ -324,24 +399,25 @@ define void @f9(<4 x float> *%dest, <4 x float> *%src) {
 
 ; ...and again with subelement alignment.
 define void @f10(<4 x float> *%dest, <4 x float> *%src) {
-; CHECK: @f10(
-; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
-; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
-; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
-; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
-; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
-; CHECK: %val.i0 = load float, float* %src.i0, align 1
-; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
-; CHECK: %val.i1 = load float, float* %src.i1, align 1
-; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
-; CHECK: %val.i2 = load float, float* %src.i2, align 1
-; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
-; CHECK: %val.i3 = load float, float* %src.i3, align 1
-; CHECK: store float %val.i0, float* %dest.i0, align 2
-; CHECK: store float %val.i1, float* %dest.i1, align 2
-; CHECK: store float %val.i2, float* %dest.i2, align 2
-; CHECK: store float %val.i3, float* %dest.i3, align 2
-; CHECK: ret void
+; CHECK-LABEL: @f10(
+; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float*
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1
+; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2
+; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 1
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 1
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 1
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 1
+; CHECK-NEXT:    store float [[VAL_I0]], float* [[DEST_I0]], align 2
+; CHECK-NEXT:    store float [[VAL_I1]], float* [[DEST_I1]], align 2
+; CHECK-NEXT:    store float [[VAL_I2]], float* [[DEST_I2]], align 2
+; CHECK-NEXT:    store float [[VAL_I3]], float* [[DEST_I3]], align 2
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x float> , <4 x float> *%src, align 1
   store <4 x float> %val, <4 x float> *%dest, align 2
   ret void
@@ -349,11 +425,141 @@ define void @f10(<4 x float> *%dest, <4 x float> *%src) {
 
 ; Test that sub-byte loads aren't scalarized.
 define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
-; CHECK: @f11(
-; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0
-; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1
-; CHECK: store <32 x i1> %and, <32 x i1>* %dest
-; CHECK: ret void
+; CHECK-LABEL: @f11(
+; CHECK-NEXT:    [[SRC1:%.*]] = getelementptr <32 x i1>, <32 x i1>* [[SRC0:%.*]], i32 1
+; CHECK-NEXT:    [[VAL0:%.*]] = load <32 x i1>, <32 x i1>* [[SRC0]], align 4
+; CHECK-NEXT:    [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i32 0
+; CHECK-NEXT:    [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i32 1
+; CHECK-NEXT:    [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i32 2
+; CHECK-NEXT:    [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i32 3
+; CHECK-NEXT:    [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i32 4
+; CHECK-NEXT:    [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i32 5
+; CHECK-NEXT:    [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i32 6
+; CHECK-NEXT:    [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i32 7
+; CHECK-NEXT:    [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i32 8
+; CHECK-NEXT:    [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i32 9
+; CHECK-NEXT:    [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i32 10
+; CHECK-NEXT:    [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i32 11
+; CHECK-NEXT:    [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i32 12
+; CHECK-NEXT:    [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i32 13
+; CHECK-NEXT:    [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i32 14
+; CHECK-NEXT:    [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i32 15
+; CHECK-NEXT:    [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i32 16
+; CHECK-NEXT:    [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i32 17
+; CHECK-NEXT:    [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i32 18
+; CHECK-NEXT:    [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i32 19
+; CHECK-NEXT:    [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i32 20
+; CHECK-NEXT:    [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i32 21
+; CHECK-NEXT:    [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i32 22
+; CHECK-NEXT:    [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i32 23
+; CHECK-NEXT:    [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i32 24
+; CHECK-NEXT:    [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i32 25
+; CHECK-NEXT:    [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i32 26
+; CHECK-NEXT:    [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i32 27
+; CHECK-NEXT:    [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i32 28
+; CHECK-NEXT:    [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i32 29
+; CHECK-NEXT:    [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i32 30
+; CHECK-NEXT:    [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i32 31
+; CHECK-NEXT:    [[VAL1:%.*]] = load <32 x i1>, <32 x i1>* [[SRC1]], align 4
+; CHECK-NEXT:    [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i32 0
+; CHECK-NEXT:    [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]]
+; CHECK-NEXT:    [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i32 1
+; CHECK-NEXT:    [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]]
+; CHECK-NEXT:    [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i32 2
+; CHECK-NEXT:    [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]]
+; CHECK-NEXT:    [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i32 3
+; CHECK-NEXT:    [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]]
+; CHECK-NEXT:    [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i32 4
+; CHECK-NEXT:    [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]]
+; CHECK-NEXT:    [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i32 5
+; CHECK-NEXT:    [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]]
+; CHECK-NEXT:    [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i32 6
+; CHECK-NEXT:    [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]]
+; CHECK-NEXT:    [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i32 7
+; CHECK-NEXT:    [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]]
+; CHECK-NEXT:    [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i32 8
+; CHECK-NEXT:    [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]]
+; CHECK-NEXT:    [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i32 9
+; CHECK-NEXT:    [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]]
+; CHECK-NEXT:    [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i32 10
+; CHECK-NEXT:    [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]]
+; CHECK-NEXT:    [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i32 11
+; CHECK-NEXT:    [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]]
+; CHECK-NEXT:    [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i32 12
+; CHECK-NEXT:    [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]]
+; CHECK-NEXT:    [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i32 13
+; CHECK-NEXT:    [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]]
+; CHECK-NEXT:    [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i32 14
+; CHECK-NEXT:    [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]]
+; CHECK-NEXT:    [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i32 15
+; CHECK-NEXT:    [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]]
+; CHECK-NEXT:    [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i32 16
+; CHECK-NEXT:    [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]]
+; CHECK-NEXT:    [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i32 17
+; CHECK-NEXT:    [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]]
+; CHECK-NEXT:    [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i32 18
+; CHECK-NEXT:    [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]]
+; CHECK-NEXT:    [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i32 19
+; CHECK-NEXT:    [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]]
+; CHECK-NEXT:    [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i32 20
+; CHECK-NEXT:    [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]]
+; CHECK-NEXT:    [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i32 21
+; CHECK-NEXT:    [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]]
+; CHECK-NEXT:    [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i32 22
+; CHECK-NEXT:    [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]]
+; CHECK-NEXT:    [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i32 23
+; CHECK-NEXT:    [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]]
+; CHECK-NEXT:    [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i32 24
+; CHECK-NEXT:    [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]]
+; CHECK-NEXT:    [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i32 25
+; CHECK-NEXT:    [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]]
+; CHECK-NEXT:    [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i32 26
+; CHECK-NEXT:    [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]]
+; CHECK-NEXT:    [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i32 27
+; CHECK-NEXT:    [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]]
+; CHECK-NEXT:    [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i32 28
+; CHECK-NEXT:    [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]]
+; CHECK-NEXT:    [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i32 29
+; CHECK-NEXT:    [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]]
+; CHECK-NEXT:    [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i32 30
+; CHECK-NEXT:    [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]]
+; CHECK-NEXT:    [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i32 31
+; CHECK-NEXT:    [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]]
+; CHECK-NEXT:    [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i32 0
+; CHECK-NEXT:    [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i32 1
+; CHECK-NEXT:    [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i32 2
+; CHECK-NEXT:    [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i32 3
+; CHECK-NEXT:    [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i32 4
+; CHECK-NEXT:    [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i32 5
+; CHECK-NEXT:    [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i32 6
+; CHECK-NEXT:    [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i32 7
+; CHECK-NEXT:    [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i32 8
+; CHECK-NEXT:    [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i32 9
+; CHECK-NEXT:    [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i32 10
+; CHECK-NEXT:    [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i32 11
+; CHECK-NEXT:    [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i32 12
+; CHECK-NEXT:    [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i32 13
+; CHECK-NEXT:    [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i32 14
+; CHECK-NEXT:    [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i32 15
+; CHECK-NEXT:    [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i32 16
+; CHECK-NEXT:    [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i32 17
+; CHECK-NEXT:    [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i32 18
+; CHECK-NEXT:    [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i32 19
+; CHECK-NEXT:    [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i32 20
+; CHECK-NEXT:    [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i32 21
+; CHECK-NEXT:    [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i32 22
+; CHECK-NEXT:    [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i32 23
+; CHECK-NEXT:    [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i32 24
+; CHECK-NEXT:    [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i32 25
+; CHECK-NEXT:    [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i32 26
+; CHECK-NEXT:    [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i32 27
+; CHECK-NEXT:    [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i32 28
+; CHECK-NEXT:    [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i32 29
+; CHECK-NEXT:    [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i32 30
+; CHECK-NEXT:    [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i32 31
+; CHECK-NEXT:    store <32 x i1> [[AND]], <32 x i1>* [[DEST:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
   %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1
   %val0 = load <32 x i1> , <32 x i1> *%src0
   %val1 = load <32 x i1> , <32 x i1> *%src1
@@ -364,32 +570,33 @@ define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
 
 ; Test vector GEPs with more than one index.
 define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
-                 float *%other) {
 ; CHECK-LABEL: @f13(
-; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
-; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
-; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
-; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
-; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0
-; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0
-; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0
-; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1
-; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1
-; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1
-; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2
-; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2
-; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2
-; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3
-; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3
-; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3
-; CHECK: store float* %val.i0, float** %dest.i0, align 32
-; CHECK: store float* %val.i1, float** %dest.i1, align 8
-; CHECK: store float* %val.i2, float** %dest.i2, align 16
-; CHECK: store float* %val.i3, float** %dest.i3, align 8
-; CHECK: ret void
+; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float**
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1
+; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2
+; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i32 0
+; CHECK-NEXT:    [[PTR_I0:%.*]] = extractelement <4 x [4 x float]*> [[PTR:%.*]], i32 0
+; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I0]], i32 0, i32 [[I_I0]]
+; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i32 1
+; CHECK-NEXT:    [[PTR_I1:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I1]], i32 1, i32 [[I_I1]]
+; CHECK-NEXT:    [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i32 2
+; CHECK-NEXT:    [[PTR_I2:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I2]], i32 2, i32 [[I_I2]]
+; CHECK-NEXT:    [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i32 3
+; CHECK-NEXT:    [[PTR_I3:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I3]], i32 3, i32 [[I_I3]]
+; CHECK-NEXT:    store float* [[VAL_I0]], float** [[DEST_I0]], align 32
+; CHECK-NEXT:    store float* [[VAL_I1]], float** [[DEST_I1]], align 8
+; CHECK-NEXT:    store float* [[VAL_I2]], float** [[DEST_I2]], align 16
+; CHECK-NEXT:    store float* [[VAL_I3]], float** [[DEST_I3]], align 8
+; CHECK-NEXT:    ret void
+;
+  float *%other) {
   %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr,
-                                <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
-                                <4 x i32> %i
+  <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
+  <4 x i32> %i
   store <4 x float *> %val, <4 x float *> *%dest
   ret void
 }
@@ -397,16 +604,41 @@ define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
 ; Test combinations of vector and non-vector PHIs.
 define <4 x float> @f14(<4 x float> %acc, i32 %count) {
 ; CHECK-LABEL: @f14(
-; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ]
-; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ]
-; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ]
-; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ]
-; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
-; CHECK: %this_acc.upto0 = insertelement <4 x float> poison, float %this_acc.i0, i32 0
-; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1
-; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2
-; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3
-; CHECK: ret <4 x float> %next_acc
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i32 0
+; CHECK-NEXT:    [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i32 1
+; CHECK-NEXT:    [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i32 2
+; CHECK-NEXT:    [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i32 3
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i32 0
+; CHECK-NEXT:    [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i32 1
+; CHECK-NEXT:    [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i32 2
+; CHECK-NEXT:    [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i32 3
+; CHECK-NEXT:    [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]])
+; CHECK-NEXT:    [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i32 0
+; CHECK-NEXT:    [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]]
+; CHECK-NEXT:    [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i32 1
+; CHECK-NEXT:    [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]]
+; CHECK-NEXT:    [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i32 2
+; CHECK-NEXT:    [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]]
+; CHECK-NEXT:    [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i32 3
+; CHECK-NEXT:    [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]]
+; CHECK-NEXT:    [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i32 0
+; CHECK-NEXT:    [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i32 1
+; CHECK-NEXT:    [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i32 2
+; CHECK-NEXT:    [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i32 3
+; CHECK-NEXT:    [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret <4 x float> [[NEXT_ACC]]
+;
 entry:
   br label %loop
 
@@ -426,40 +658,50 @@ exit:
 ; Test unary operator scalarization.
 define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) {
 ; CHECK-LABEL: @f15(
-; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
-; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float*
-; CHECK: %val.i0 = load float, float* %ptr.i0, align 16
-; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
-; CHECK: %val.i1 = load float, float* %ptr.i1, align 4
-; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
-; CHECK: %val.i2 = load float, float* %ptr.i2, align 8
-; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
-; CHECK: %val.i3 = load float, float* %ptr.i3, align 4
-; CHECK: %neg.i0 = fneg float %val.i0
-; CHECK: %neg.i1 = fneg float %val.i1
-; CHECK: %neg.i2 = fneg float %val.i2
-; CHECK: %neg.i3 = fneg float %val.i3
-; CHECK: %neg.upto0 = insertelement <4 x float> poison, float %neg.i0, i32 0
-; CHECK: %neg.upto1 = insertelement <4 x float> %neg.upto0, float %neg.i1, i32 1
-; CHECK: %neg.upto2 = insertelement <4 x float> %neg.upto1, float %neg.i2, i32 2
-; CHECK: %neg = insertelement <4 x float> %neg.upto2, float %neg.i3, i32 3
-; CHECK: %call = call <4 x float> @ext(<4 x float> %neg)
-; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0
-; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.000000e+00
-; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1
-; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.000000e+00
-; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2
-; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.000000e+00
-; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3
-; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.000000e+00
-; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.000000e+00
-; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.000000e+00
-; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.000000e+00
-; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.000000e+00
-; CHECK: store float %sel.i0, float* %ptr.i0, align 16
-; CHECK: store float %sel.i1, float* %ptr.i1, align 4
-; CHECK: store float %sel.i2, float* %ptr.i2, align 8
-; CHECK: store float %sel.i3, float* %ptr.i3, align 4
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16
+; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4
+; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8
+; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4
+; CHECK-NEXT:    [[NEG_I0:%.*]] = fneg float [[VAL_I0]]
+; CHECK-NEXT:    [[NEG_I1:%.*]] = fneg float [[VAL_I1]]
+; CHECK-NEXT:    [[NEG_I2:%.*]] = fneg float [[VAL_I2]]
+; CHECK-NEXT:    [[NEG_I3:%.*]] = fneg float [[VAL_I3]]
+; CHECK-NEXT:    [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i32 0
+; CHECK-NEXT:    [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i32 1
+; CHECK-NEXT:    [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i32 2
+; CHECK-NEXT:    [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i32 3
+; CHECK-NEXT:    [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]])
+; CHECK-NEXT:    [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0
+; CHECK-NEXT:    [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
+; CHECK-NEXT:    [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1
+; CHECK-NEXT:    [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
+; CHECK-NEXT:    [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2
+; CHECK-NEXT:    [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
+; CHECK-NEXT:    [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3
+; CHECK-NEXT:    [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
+; CHECK-NEXT:    [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
+; CHECK-NEXT:    [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
+; CHECK-NEXT:    [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
+; CHECK-NEXT:    [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
+; CHECK-NEXT:    store float [[SEL_I0]], float* [[PTR_I0]], align 16
+; CHECK-NEXT:    store float [[SEL_I1]], float* [[PTR_I1]], align 4
+; CHECK-NEXT:    store float [[SEL_I2]], float* [[PTR_I2]], align 8
+; CHECK-NEXT:    store float [[SEL_I3]], float* [[PTR_I3]], align 4
+; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
+; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -488,52 +730,106 @@ exit:
 ; Check that IR flags are preserved.
 define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
 ; CHECK-LABEL: @f16(
-; CHECK: %res.i0 = add nuw nsw i32
-; CHECK: %res.i1 = add nuw nsw i32
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
+; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
+; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
   %res = add nuw nsw <2 x i32> %i, %j
   ret <2 x i32> %res
 }
 define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
 ; CHECK-LABEL: @f17(
-; CHECK: %res.i0 = sdiv exact i32
-; CHECK: %res.i1 = sdiv exact i32
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]]
+; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
+; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
   %res = sdiv exact <2 x i32> %i, %j
   ret <2 x i32> %res
 }
 define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @f18(
-; CHECK: %res.i0 = fadd fast float
-; CHECK: %res.i1 = fadd fast float
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]]
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[RES]]
+;
   %res = fadd fast <2 x float> %x, %y
   ret <2 x float> %res
 }
 define <2 x float> @f19(<2 x float> %x) {
 ; CHECK-LABEL: @f19(
-; CHECK: %res.i0 = fneg fast float
-; CHECK: %res.i1 = fneg fast float
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = fneg fast float [[X_I0]]
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = fneg fast float [[X_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[RES]]
+;
   %res = fneg fast <2 x float> %x
   ret <2 x float> %res
 }
 define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @f20(
-; CHECK: %res.i0 = fcmp fast ogt float
-; CHECK: %res.i1 = fcmp fast ogt float
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]]
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
   %res = fcmp fast ogt <2 x float> %x, %y
   ret <2 x i1> %res
 }
 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
 define <2 x float> @f21(<2 x float> %x) {
 ; CHECK-LABEL: @f21(
-; CHECK: %res.i0 = call fast float @llvm.sqrt.f32
-; CHECK: %res.i1 = call fast float @llvm.sqrt.f32
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]])
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]])
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[RES]]
+;
   %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
   ret <2 x float> %res
 }
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
 define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
 ; CHECK-LABEL: @f22(
-; CHECK: %res.i0 = call fast float @llvm.fma.f32
-; CHECK: %res.i1 = call fast float @llvm.fma.f32
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]])
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
+; CHECK-NEXT:    [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]])
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[RES]]
+;
   %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
   ret <2 x float> %res
 }
@@ -541,10 +837,11 @@ define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
 ; See https://reviews.llvm.org/D83101#2133062
 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
 ; CHECK-LABEL: @f23_crash(
-; CHECK: %v0 = extractelement <2 x i32> %srcvec, i32 0
-; CHECK: %t1.upto0 = insertelement <2 x i32> poison, i32 %v0, i32 0
-; CHECK: %t1 = insertelement <2 x i32> %t1.upto0, i32 %v1, i32 1
-; CHECK: ret <2 x i32> %t1
+; CHECK-NEXT:    [[V0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i32 0
+; CHECK-NEXT:    [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[V0]], i32 0
+; CHECK-NEXT:    [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[T1]]
+;
   %v0 = extractelement <2 x i32> %srcvec, i32 0
   %t0 = insertelement <2 x i32> poison, i32 %v0, i32 0
   %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1

diff  --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll
index 23eef9ec927de..9fb6db4de0b76 100644
--- a/llvm/test/Transforms/Scalarizer/basic.ll
+++ b/llvm/test/Transforms/Scalarizer/basic.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -6,57 +7,58 @@ declare <4 x float> @ext(<4 x float>)
 
 define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
 ; CHECK-LABEL: @f1(
-; CHECK: entry:
-; CHECK:   %init.i0 = extractelement <4 x float> %init, i32 0
-; CHECK:   %init.i1 = extractelement <4 x float> %init, i32 1
-; CHECK:   %init.i2 = extractelement <4 x float> %init, i32 2
-; CHECK:   %init.i3 = extractelement <4 x float> %init, i32 3
-; CHECK:   br label %loop
-; CHECK: loop:
-; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
-; CHECK:   %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ]
-; CHECK:   %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ]
-; CHECK:   %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ]
-; CHECK:   %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ]
-; CHECK:   %nexti = sub i32 %i, 1
-; CHECK:   %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
-; CHECK:   %ptr.i0 = bitcast <4 x float>* %ptr to float*
-; CHECK:   %val.i0 = load float, float* %ptr.i0, align 16
-; CHECK:   %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
-; CHECK:   %val.i1 = load float, float* %ptr.i1, align 4
-; CHECK:   %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
-; CHECK:   %val.i2 = load float, float* %ptr.i2, align 8
-; CHECK:   %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
-; CHECK:   %val.i3 = load float, float* %ptr.i3, align 4
-; CHECK:   %add.i0 = fadd float %val.i0, %val.i2
-; CHECK:   %add.i1 = fadd float %val.i1, %val.i3
-; CHECK:   %add.i2 = fadd float %acc.i0, %acc.i2
-; CHECK:   %add.i3 = fadd float %acc.i1, %acc.i3
-; CHECK:   %add.upto0 = insertelement <4 x float> poison, float %add.i0, i32 0
-; CHECK:   %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
-; CHECK:   %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
-; CHECK:   %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
-; CHECK:   %call = call <4 x float> @ext(<4 x float> %add)
-; CHECK:   %call.i0 = extractelement <4 x float> %call, i32 0
-; CHECK:   %cmp.i0 = fcmp ogt float %call.i0, 1.0
-; CHECK:   %call.i1 = extractelement <4 x float> %call, i32 1
-; CHECK:   %cmp.i1 = fcmp ogt float %call.i1, 2.0
-; CHECK:   %call.i2 = extractelement <4 x float> %call, i32 2
-; CHECK:   %cmp.i2 = fcmp ogt float %call.i2, 3.0
-; CHECK:   %call.i3 = extractelement <4 x float> %call, i32 3
-; CHECK:   %cmp.i3 = fcmp ogt float %call.i3, 4.0
-; CHECK:   %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0
-; CHECK:   %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0
-; CHECK:   %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0
-; CHECK:   %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0
-; CHECK:   store float %sel.i0, float* %ptr.i0
-; CHECK:   store float %sel.i1, float* %ptr.i1
-; CHECK:   store float %sel.i2, float* %ptr.i2
-; CHECK:   store float %sel.i3, float* %ptr.i3
-; CHECK:   %test = icmp eq i32 %nexti, 0
-; CHECK:   br i1 %test, label %loop, label %exit
-; CHECK: exit:
-; CHECK:   ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i32 0
+; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i32 1
+; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i32 2
+; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i32 3
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16
+; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4
+; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8
+; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4
+; CHECK-NEXT:    [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]]
+; CHECK-NEXT:    [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i32 0
+; CHECK-NEXT:    [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i32 1
+; CHECK-NEXT:    [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i32 3
+; CHECK-NEXT:    [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]])
+; CHECK-NEXT:    [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0
+; CHECK-NEXT:    [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
+; CHECK-NEXT:    [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1
+; CHECK-NEXT:    [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
+; CHECK-NEXT:    [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2
+; CHECK-NEXT:    [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
+; CHECK-NEXT:    [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3
+; CHECK-NEXT:    [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
+; CHECK-NEXT:    [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
+; CHECK-NEXT:    [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
+; CHECK-NEXT:    [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
+; CHECK-NEXT:    [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
+; CHECK-NEXT:    store float [[SEL_I0]], float* [[PTR_I0]], align 16
+; CHECK-NEXT:    store float [[SEL_I1]], float* [[PTR_I1]], align 4
+; CHECK-NEXT:    store float [[SEL_I2]], float* [[PTR_I2]], align 8
+; CHECK-NEXT:    store float [[SEL_I3]], float* [[PTR_I3]], align 4
+; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
+; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -70,17 +72,17 @@ loop:
   %dval = bitcast <4 x float> %val to <2 x double>
   %dacc = bitcast <4 x float> %acc to <2 x double>
   %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
-                            <2 x i32> <i32 0, i32 2>
+  <2 x i32> <i32 0, i32 2>
   %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
-                            <2 x i32> <i32 1, i32 3>
+  <2 x i32> <i32 1, i32 3>
   %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
   %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
   %add = fadd <4 x float> %f1, %f2
   %call = call <4 x float> @ext(<4 x float> %add)
   %cmp = fcmp ogt <4 x float> %call,
-                  <float 1.0, float 2.0, float 3.0, float 4.0>
+  <float 1.0, float 2.0, float 3.0, float 4.0>
   %sel = select <4 x i1> %cmp, <4 x float> %call,
-                <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
+  <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
   store <4 x float> %sel, <4 x float> *%ptr
 
   %test = icmp eq i32 %nexti, 0
@@ -91,57 +93,58 @@ exit:
 }
 
 define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
-; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) {
-; CHECK: entry:
-; CHECK:   %init.i0 = extractelement <4 x i32> %init, i32 0
-; CHECK:   %init.i1 = extractelement <4 x i32> %init, i32 1
-; CHECK:   %init.i2 = extractelement <4 x i32> %init, i32 2
-; CHECK:   %init.i3 = extractelement <4 x i32> %init, i32 3
-; CHECK:   br label %loop
-; CHECK: loop:
-; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
-; CHECK:   %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ]
-; CHECK:   %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ]
-; CHECK:   %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ]
-; CHECK:   %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ]
-; CHECK:   %nexti = sub i32 %i, 1
-; CHECK:   %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i
-; CHECK:   %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
-; CHECK:   %val.i0 = load i8, i8* %ptr.i0, align 4
-; CHECK:   %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1
-; CHECK:   %val.i1 = load i8, i8* %ptr.i1, align 1
-; CHECK:   %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2
-; CHECK:   %val.i2 = load i8, i8* %ptr.i2, align 2
-; CHECK:   %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3
-; CHECK:   %val.i3 = load i8, i8* %ptr.i3, align 1
-; CHECK:   %ext.i0 = sext i8 %val.i0 to i32
-; CHECK:   %ext.i1 = sext i8 %val.i1 to i32
-; CHECK:   %ext.i2 = sext i8 %val.i2 to i32
-; CHECK:   %ext.i3 = sext i8 %val.i3 to i32
-; CHECK:   %add.i0 = add i32 %ext.i0, %acc.i0
-; CHECK:   %add.i1 = add i32 %ext.i1, %acc.i1
-; CHECK:   %add.i2 = add i32 %ext.i2, %acc.i2
-; CHECK:   %add.i3 = add i32 %ext.i3, %acc.i3
-; CHECK:   %cmp.i0 = icmp slt i32 %add.i0, -10
-; CHECK:   %cmp.i1 = icmp slt i32 %add.i1, -11
-; CHECK:   %cmp.i2 = icmp slt i32 %add.i2, -12
-; CHECK:   %cmp.i3 = icmp slt i32 %add.i3, -13
-; CHECK:   %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i
-; CHECK:   %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i
-; CHECK:   %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i
-; CHECK:   %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i
-; CHECK:   %trunc.i0 = trunc i32 %sel.i0 to i8
-; CHECK:   %trunc.i1 = trunc i32 %sel.i1 to i8
-; CHECK:   %trunc.i2 = trunc i32 %sel.i2 to i8
-; CHECK:   %trunc.i3 = trunc i32 %sel.i3 to i8
-; CHECK:   store i8 %trunc.i0, i8* %ptr.i0, align 4
-; CHECK:   store i8 %trunc.i1, i8* %ptr.i1, align 1
-; CHECK:   store i8 %trunc.i2, i8* %ptr.i2, align 2
-; CHECK:   store i8 %trunc.i3, i8* %ptr.i3, align 1
-; CHECK:   %test = icmp eq i32 %nexti, 0
-; CHECK:   br i1 %test, label %loop, label %exit
-; CHECK: exit:
-; CHECK:   ret void
+; CHECK-LABEL: @f2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i32 0
+; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i32 1
+; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i32 2
+; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i32 3
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x i8>, <4 x i8>* [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <4 x i8>* [[PTR]] to i8*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i8, i8* [[PTR_I0]], align 4
+; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i8, i8* [[PTR_I1]], align 1
+; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i8, i8* [[PTR_I2]], align 2
+; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i8, i8* [[PTR_I3]], align 1
+; CHECK-NEXT:    [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32
+; CHECK-NEXT:    [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32
+; CHECK-NEXT:    [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32
+; CHECK-NEXT:    [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]]
+; CHECK-NEXT:    [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10
+; CHECK-NEXT:    [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11
+; CHECK-NEXT:    [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12
+; CHECK-NEXT:    [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13
+; CHECK-NEXT:    [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]]
+; CHECK-NEXT:    [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]]
+; CHECK-NEXT:    [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]]
+; CHECK-NEXT:    [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]]
+; CHECK-NEXT:    [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8
+; CHECK-NEXT:    [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8
+; CHECK-NEXT:    [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8
+; CHECK-NEXT:    [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8
+; CHECK-NEXT:    store i8 [[TRUNC_I0]], i8* [[PTR_I0]], align 4
+; CHECK-NEXT:    store i8 [[TRUNC_I1]], i8* [[PTR_I1]], align 1
+; CHECK-NEXT:    store i8 [[TRUNC_I2]], i8* [[PTR_I2]], align 2
+; CHECK-NEXT:    store i8 [[TRUNC_I3]], i8* [[PTR_I3]], align 1
+; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
+; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -157,7 +160,7 @@ loop:
   %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
   %single = insertelement <4 x i32> undef, i32 %i, i32 0
   %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
-                         <4 x i32> zeroinitializer
+  <4 x i32> zeroinitializer
   %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
   %trunc = trunc <4 x i32> %sel to <4 x i8>
   store <4 x i8> %trunc, <4 x i8> *%ptr
@@ -172,15 +175,28 @@ exit:
 ; Check that !tbaa information is preserved.
 define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f3(
-; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
-; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]]
-; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]]
-; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]]
-; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
-; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
-; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
-; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
-; CHECK: ret void
+; CHECK-NEXT:    [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
+; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
+; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
@@ -190,15 +206,28 @@ define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
 ; Check that !tbaa.struct information is preserved.
 define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f4(
-; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
-; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
-; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
-; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
-; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
-; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
-; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
-; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
-; CHECK: ret void
+; CHECK-NEXT:    [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
+; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
+; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa.struct !5
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa.struct !5
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa.struct !5
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa.struct !5
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa.struct !5
+; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa.struct !5
+; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa.struct !5
+; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa.struct !5
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
@@ -208,15 +237,38 @@ define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
 ; Check that llvm.access.group information is preserved.
 define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f5(
-; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.access.group ![[TAG:[0-9]*]]
-; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.access.group ![[TAG]]
-; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.access.group ![[TAG]]
-; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.access.group ![[TAG]]
-; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.access.group ![[TAG]]
-; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.access.group ![[TAG]]
-; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.access.group ![[TAG]]
-; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.access.group ![[TAG]]
-; CHECK: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_SRC:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[SRC:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[THIS_SRC_I0:%.*]] = bitcast <4 x i32>* [[THIS_SRC]] to i32*
+; CHECK-NEXT:    [[THIS_SRC_I1:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 1
+; CHECK-NEXT:    [[THIS_SRC_I2:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 2
+; CHECK-NEXT:    [[THIS_SRC_I3:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 3
+; CHECK-NEXT:    [[THIS_DST:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[DST:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[THIS_DST_I0:%.*]] = bitcast <4 x i32>* [[THIS_DST]] to i32*
+; CHECK-NEXT:    [[THIS_DST_I1:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 1
+; CHECK-NEXT:    [[THIS_DST_I2:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 2
+; CHECK-NEXT:    [[THIS_DST_I3:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[THIS_SRC_I0]], align 16, !llvm.access.group !6
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[THIS_SRC_I1]], align 4, !llvm.access.group !6
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[THIS_SRC_I2]], align 8, !llvm.access.group !6
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[THIS_SRC_I3]], align 4, !llvm.access.group !6
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[THIS_DST_I0]], align 16, !llvm.access.group !6
+; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[THIS_DST_I1]], align 4, !llvm.access.group !6
+; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[THIS_DST_I2]], align 8, !llvm.access.group !6
+; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[THIS_DST_I3]], align 4, !llvm.access.group !6
+; CHECK-NEXT:    [[NEXT_INDEX]] = add i32 [[INDEX]], -1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -238,29 +290,50 @@ end:
 ; Check that fpmath information is preserved.
 define <4 x float> @f6(<4 x float> %x) {
 ; CHECK-LABEL: @f6(
-; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0
-; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]]
-; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1
-; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]]
-; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2
-; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]]
-; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3
-; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]]
-; CHECK: %res.upto0 = insertelement <4 x float> poison, float %res.i0, i32 0
-; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1
-; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2
-; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3
-; CHECK: ret <4 x float> %res
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <4 x float> [[X]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[X_I2:%.*]] = extractelement <4 x float> [[X]], i32 2
+; CHECK-NEXT:    [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[X_I3:%.*]] = extractelement <4 x float> [[X]], i32 3
+; CHECK-NEXT:    [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i32 2
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
   %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
-    !fpmath !4
+  !fpmath !4
   ret <4 x float> %res
 }
 
 ; Check that random metadata isn't kept.
 define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f7(
-; CHECK-NOT: !foo
-; CHECK: ret void
+; CHECK-NEXT:    [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
+; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
+; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
+; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
+; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
+; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
+; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
+; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
+; CHECK-NEXT:    store i32 [[ADD_I0]], i32* [[DST_I0]], align 16
+; CHECK-NEXT:    store i32 [[ADD_I1]], i32* [[DST_I1]], align 4
+; CHECK-NEXT:    store i32 [[ADD_I2]], i32* [[DST_I2]], align 8
+; CHECK-NEXT:    store i32 [[ADD_I3]], i32* [[DST_I3]], align 4
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x i32> , <4 x i32> *%src, !foo !5
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%dst, !foo !5
@@ -269,26 +342,27 @@ define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
 
 ; Test GEP with vectors.
 define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
-                float *%other) {
 ; CHECK-LABEL: @f8(
-; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
-; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
-; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
-; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
-; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
-; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
-; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
-; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
-; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
-; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100
-; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1
-; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100
-; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3
-; CHECK: store float* %val.i0, float** %dest.i0, align 32
-; CHECK: store float* %val.i1, float** %dest.i1, align 8
-; CHECK: store float* %val.i2, float** %dest.i2, align 16
-; CHECK: store float* %val.i3, float** %dest.i3, align 8
-; CHECK: ret void
+; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float**
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1
+; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2
+; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3
+; CHECK-NEXT:    [[PTR0_I0:%.*]] = extractelement <4 x float*> [[PTR0:%.*]], i32 0
+; CHECK-NEXT:    [[PTR0_I2:%.*]] = extractelement <4 x float*> [[PTR0]], i32 2
+; CHECK-NEXT:    [[PTR0_I3:%.*]] = extractelement <4 x float*> [[PTR0]], i32 3
+; CHECK-NEXT:    [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i32 1
+; CHECK-NEXT:    [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i32 3
+; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr float, float* [[PTR0_I0]], i32 100
+; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr float, float* [[OTHER:%.*]], i32 [[I0_I1]]
+; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr float, float* [[PTR0_I2]], i32 100
+; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr float, float* [[PTR0_I3]], i32 [[I0_I3]]
+; CHECK-NEXT:    store float* [[VAL_I0]], float** [[DEST_I0]], align 32
+; CHECK-NEXT:    store float* [[VAL_I1]], float** [[DEST_I1]], align 8
+; CHECK-NEXT:    store float* [[VAL_I2]], float** [[DEST_I2]], align 16
+; CHECK-NEXT:    store float* [[VAL_I3]], float** [[DEST_I3]], align 8
+; CHECK-NEXT:    ret void
+;
+  float *%other) {
   %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
   %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
   %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
@@ -299,24 +373,25 @@ define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
 
 ; Test the handling of unaligned loads.
 define void @f9(<4 x float> *%dest, <4 x float> *%src) {
-; CHECK: @f9(
-; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
-; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
-; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
-; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
-; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
-; CHECK: %val.i0 = load float, float* %src.i0, align 4
-; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
-; CHECK: %val.i1 = load float, float* %src.i1, align 4
-; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
-; CHECK: %val.i2 = load float, float* %src.i2, align 4
-; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
-; CHECK: %val.i3 = load float, float* %src.i3, align 4
-; CHECK: store float %val.i0, float* %dest.i0, align 8
-; CHECK: store float %val.i1, float* %dest.i1, align 4
-; CHECK: store float %val.i2, float* %dest.i2, align 8
-; CHECK: store float %val.i3, float* %dest.i3, align 4
-; CHECK: ret void
+; CHECK-LABEL: @f9(
+; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float*
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1
+; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2
+; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 4
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 4
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 4
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 4
+; CHECK-NEXT:    store float [[VAL_I0]], float* [[DEST_I0]], align 8
+; CHECK-NEXT:    store float [[VAL_I1]], float* [[DEST_I1]], align 4
+; CHECK-NEXT:    store float [[VAL_I2]], float* [[DEST_I2]], align 8
+; CHECK-NEXT:    store float [[VAL_I3]], float* [[DEST_I3]], align 4
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x float> , <4 x float> *%src, align 4
   store <4 x float> %val, <4 x float> *%dest, align 8
   ret void
@@ -324,24 +399,25 @@ define void @f9(<4 x float> *%dest, <4 x float> *%src) {
 
 ; ...and again with subelement alignment.
 define void @f10(<4 x float> *%dest, <4 x float> *%src) {
-; CHECK: @f10(
-; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
-; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
-; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
-; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
-; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
-; CHECK: %val.i0 = load float, float* %src.i0, align 1
-; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
-; CHECK: %val.i1 = load float, float* %src.i1, align 1
-; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
-; CHECK: %val.i2 = load float, float* %src.i2, align 1
-; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
-; CHECK: %val.i3 = load float, float* %src.i3, align 1
-; CHECK: store float %val.i0, float* %dest.i0, align 2
-; CHECK: store float %val.i1, float* %dest.i1, align 2
-; CHECK: store float %val.i2, float* %dest.i2, align 2
-; CHECK: store float %val.i3, float* %dest.i3, align 2
-; CHECK: ret void
+; CHECK-LABEL: @f10(
+; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float*
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1
+; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2
+; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3
+; CHECK-NEXT:    [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 1
+; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 1
+; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 1
+; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 1
+; CHECK-NEXT:    store float [[VAL_I0]], float* [[DEST_I0]], align 2
+; CHECK-NEXT:    store float [[VAL_I1]], float* [[DEST_I1]], align 2
+; CHECK-NEXT:    store float [[VAL_I2]], float* [[DEST_I2]], align 2
+; CHECK-NEXT:    store float [[VAL_I3]], float* [[DEST_I3]], align 2
+; CHECK-NEXT:    ret void
+;
   %val = load <4 x float> , <4 x float> *%src, align 1
   store <4 x float> %val, <4 x float> *%dest, align 2
   ret void
@@ -349,11 +425,141 @@ define void @f10(<4 x float> *%dest, <4 x float> *%src) {
 
 ; Test that sub-byte loads aren't scalarized.
 define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
-; CHECK: @f11(
-; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0
-; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1
-; CHECK: store <32 x i1> %and, <32 x i1>* %dest
-; CHECK: ret void
+; CHECK-LABEL: @f11(
+; CHECK-NEXT:    [[SRC1:%.*]] = getelementptr <32 x i1>, <32 x i1>* [[SRC0:%.*]], i32 1
+; CHECK-NEXT:    [[VAL0:%.*]] = load <32 x i1>, <32 x i1>* [[SRC0]], align 4
+; CHECK-NEXT:    [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i32 0
+; CHECK-NEXT:    [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i32 1
+; CHECK-NEXT:    [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i32 2
+; CHECK-NEXT:    [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i32 3
+; CHECK-NEXT:    [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i32 4
+; CHECK-NEXT:    [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i32 5
+; CHECK-NEXT:    [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i32 6
+; CHECK-NEXT:    [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i32 7
+; CHECK-NEXT:    [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i32 8
+; CHECK-NEXT:    [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i32 9
+; CHECK-NEXT:    [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i32 10
+; CHECK-NEXT:    [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i32 11
+; CHECK-NEXT:    [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i32 12
+; CHECK-NEXT:    [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i32 13
+; CHECK-NEXT:    [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i32 14
+; CHECK-NEXT:    [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i32 15
+; CHECK-NEXT:    [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i32 16
+; CHECK-NEXT:    [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i32 17
+; CHECK-NEXT:    [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i32 18
+; CHECK-NEXT:    [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i32 19
+; CHECK-NEXT:    [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i32 20
+; CHECK-NEXT:    [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i32 21
+; CHECK-NEXT:    [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i32 22
+; CHECK-NEXT:    [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i32 23
+; CHECK-NEXT:    [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i32 24
+; CHECK-NEXT:    [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i32 25
+; CHECK-NEXT:    [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i32 26
+; CHECK-NEXT:    [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i32 27
+; CHECK-NEXT:    [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i32 28
+; CHECK-NEXT:    [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i32 29
+; CHECK-NEXT:    [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i32 30
+; CHECK-NEXT:    [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i32 31
+; CHECK-NEXT:    [[VAL1:%.*]] = load <32 x i1>, <32 x i1>* [[SRC1]], align 4
+; CHECK-NEXT:    [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i32 0
+; CHECK-NEXT:    [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]]
+; CHECK-NEXT:    [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i32 1
+; CHECK-NEXT:    [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]]
+; CHECK-NEXT:    [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i32 2
+; CHECK-NEXT:    [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]]
+; CHECK-NEXT:    [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i32 3
+; CHECK-NEXT:    [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]]
+; CHECK-NEXT:    [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i32 4
+; CHECK-NEXT:    [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]]
+; CHECK-NEXT:    [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i32 5
+; CHECK-NEXT:    [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]]
+; CHECK-NEXT:    [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i32 6
+; CHECK-NEXT:    [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]]
+; CHECK-NEXT:    [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i32 7
+; CHECK-NEXT:    [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]]
+; CHECK-NEXT:    [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i32 8
+; CHECK-NEXT:    [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]]
+; CHECK-NEXT:    [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i32 9
+; CHECK-NEXT:    [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]]
+; CHECK-NEXT:    [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i32 10
+; CHECK-NEXT:    [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]]
+; CHECK-NEXT:    [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i32 11
+; CHECK-NEXT:    [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]]
+; CHECK-NEXT:    [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i32 12
+; CHECK-NEXT:    [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]]
+; CHECK-NEXT:    [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i32 13
+; CHECK-NEXT:    [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]]
+; CHECK-NEXT:    [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i32 14
+; CHECK-NEXT:    [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]]
+; CHECK-NEXT:    [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i32 15
+; CHECK-NEXT:    [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]]
+; CHECK-NEXT:    [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i32 16
+; CHECK-NEXT:    [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]]
+; CHECK-NEXT:    [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i32 17
+; CHECK-NEXT:    [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]]
+; CHECK-NEXT:    [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i32 18
+; CHECK-NEXT:    [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]]
+; CHECK-NEXT:    [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i32 19
+; CHECK-NEXT:    [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]]
+; CHECK-NEXT:    [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i32 20
+; CHECK-NEXT:    [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]]
+; CHECK-NEXT:    [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i32 21
+; CHECK-NEXT:    [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]]
+; CHECK-NEXT:    [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i32 22
+; CHECK-NEXT:    [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]]
+; CHECK-NEXT:    [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i32 23
+; CHECK-NEXT:    [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]]
+; CHECK-NEXT:    [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i32 24
+; CHECK-NEXT:    [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]]
+; CHECK-NEXT:    [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i32 25
+; CHECK-NEXT:    [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]]
+; CHECK-NEXT:    [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i32 26
+; CHECK-NEXT:    [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]]
+; CHECK-NEXT:    [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i32 27
+; CHECK-NEXT:    [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]]
+; CHECK-NEXT:    [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i32 28
+; CHECK-NEXT:    [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]]
+; CHECK-NEXT:    [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i32 29
+; CHECK-NEXT:    [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]]
+; CHECK-NEXT:    [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i32 30
+; CHECK-NEXT:    [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]]
+; CHECK-NEXT:    [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i32 31
+; CHECK-NEXT:    [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]]
+; CHECK-NEXT:    [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i32 0
+; CHECK-NEXT:    [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i32 1
+; CHECK-NEXT:    [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i32 2
+; CHECK-NEXT:    [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i32 3
+; CHECK-NEXT:    [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i32 4
+; CHECK-NEXT:    [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i32 5
+; CHECK-NEXT:    [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i32 6
+; CHECK-NEXT:    [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i32 7
+; CHECK-NEXT:    [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i32 8
+; CHECK-NEXT:    [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i32 9
+; CHECK-NEXT:    [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i32 10
+; CHECK-NEXT:    [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i32 11
+; CHECK-NEXT:    [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i32 12
+; CHECK-NEXT:    [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i32 13
+; CHECK-NEXT:    [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i32 14
+; CHECK-NEXT:    [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i32 15
+; CHECK-NEXT:    [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i32 16
+; CHECK-NEXT:    [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i32 17
+; CHECK-NEXT:    [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i32 18
+; CHECK-NEXT:    [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i32 19
+; CHECK-NEXT:    [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i32 20
+; CHECK-NEXT:    [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i32 21
+; CHECK-NEXT:    [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i32 22
+; CHECK-NEXT:    [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i32 23
+; CHECK-NEXT:    [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i32 24
+; CHECK-NEXT:    [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i32 25
+; CHECK-NEXT:    [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i32 26
+; CHECK-NEXT:    [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i32 27
+; CHECK-NEXT:    [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i32 28
+; CHECK-NEXT:    [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i32 29
+; CHECK-NEXT:    [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i32 30
+; CHECK-NEXT:    [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i32 31
+; CHECK-NEXT:    store <32 x i1> [[AND]], <32 x i1>* [[DEST:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
   %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1
   %val0 = load <32 x i1> , <32 x i1> *%src0
   %val1 = load <32 x i1> , <32 x i1> *%src1
@@ -364,32 +570,33 @@ define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
 
 ; Test vector GEPs with more than one index.
 define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
-                 float *%other) {
 ; CHECK-LABEL: @f13(
-; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
-; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
-; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
-; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
-; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0
-; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0
-; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0
-; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1
-; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1
-; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1
-; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2
-; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2
-; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2
-; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3
-; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3
-; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3
-; CHECK: store float* %val.i0, float** %dest.i0, align 32
-; CHECK: store float* %val.i1, float** %dest.i1, align 8
-; CHECK: store float* %val.i2, float** %dest.i2, align 16
-; CHECK: store float* %val.i3, float** %dest.i3, align 8
-; CHECK: ret void
+; CHECK-NEXT:    [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float**
+; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1
+; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2
+; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i32 0
+; CHECK-NEXT:    [[PTR_I0:%.*]] = extractelement <4 x [4 x float]*> [[PTR:%.*]], i32 0
+; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I0]], i32 0, i32 [[I_I0]]
+; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i32 1
+; CHECK-NEXT:    [[PTR_I1:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I1]], i32 1, i32 [[I_I1]]
+; CHECK-NEXT:    [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i32 2
+; CHECK-NEXT:    [[PTR_I2:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I2]], i32 2, i32 [[I_I2]]
+; CHECK-NEXT:    [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i32 3
+; CHECK-NEXT:    [[PTR_I3:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I3]], i32 3, i32 [[I_I3]]
+; CHECK-NEXT:    store float* [[VAL_I0]], float** [[DEST_I0]], align 32
+; CHECK-NEXT:    store float* [[VAL_I1]], float** [[DEST_I1]], align 8
+; CHECK-NEXT:    store float* [[VAL_I2]], float** [[DEST_I2]], align 16
+; CHECK-NEXT:    store float* [[VAL_I3]], float** [[DEST_I3]], align 8
+; CHECK-NEXT:    ret void
+;
+  float *%other) {
   %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr,
-                                <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
-                                <4 x i32> %i
+  <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
+  <4 x i32> %i
   store <4 x float *> %val, <4 x float *> *%dest
   ret void
 }
@@ -397,16 +604,41 @@ define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
 ; Test combinations of vector and non-vector PHIs.
 define <4 x float> @f14(<4 x float> %acc, i32 %count) {
 ; CHECK-LABEL: @f14(
-; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ]
-; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ]
-; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ]
-; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ]
-; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
-; CHECK: %this_acc.upto0 = insertelement <4 x float> poison, float %this_acc.i0, i32 0
-; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1
-; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2
-; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3
-; CHECK: ret <4 x float> %next_acc
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i32 0
+; CHECK-NEXT:    [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i32 1
+; CHECK-NEXT:    [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i32 2
+; CHECK-NEXT:    [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i32 3
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i32 0
+; CHECK-NEXT:    [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i32 1
+; CHECK-NEXT:    [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i32 2
+; CHECK-NEXT:    [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i32 3
+; CHECK-NEXT:    [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]])
+; CHECK-NEXT:    [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i32 0
+; CHECK-NEXT:    [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]]
+; CHECK-NEXT:    [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i32 1
+; CHECK-NEXT:    [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]]
+; CHECK-NEXT:    [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i32 2
+; CHECK-NEXT:    [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]]
+; CHECK-NEXT:    [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i32 3
+; CHECK-NEXT:    [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]]
+; CHECK-NEXT:    [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i32 0
+; CHECK-NEXT:    [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i32 1
+; CHECK-NEXT:    [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i32 2
+; CHECK-NEXT:    [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i32 3
+; CHECK-NEXT:    [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret <4 x float> [[NEXT_ACC]]
+;
 entry:
   br label %loop
 
@@ -426,40 +658,50 @@ exit:
 ; Test unary operator scalarization.
 define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) {
 ; CHECK-LABEL: @f15(
-; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
-; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float*
-; CHECK: %val.i0 = load float, float* %ptr.i0, align 16
-; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
-; CHECK: %val.i1 = load float, float* %ptr.i1, align 4
-; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
-; CHECK: %val.i2 = load float, float* %ptr.i2, align 8
-; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
-; CHECK: %val.i3 = load float, float* %ptr.i3, align 4
-; CHECK: %neg.i0 = fneg float %val.i0
-; CHECK: %neg.i1 = fneg float %val.i1
-; CHECK: %neg.i2 = fneg float %val.i2
-; CHECK: %neg.i3 = fneg float %val.i3
-; CHECK: %neg.upto0 = insertelement <4 x float> poison, float %neg.i0, i32 0
-; CHECK: %neg.upto1 = insertelement <4 x float> %neg.upto0, float %neg.i1, i32 1
-; CHECK: %neg.upto2 = insertelement <4 x float> %neg.upto1, float %neg.i2, i32 2
-; CHECK: %neg = insertelement <4 x float> %neg.upto2, float %neg.i3, i32 3
-; CHECK: %call = call <4 x float> @ext(<4 x float> %neg)
-; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0
-; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.000000e+00
-; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1
-; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.000000e+00
-; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2
-; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.000000e+00
-; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3
-; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.000000e+00
-; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.000000e+00
-; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.000000e+00
-; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.000000e+00
-; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.000000e+00
-; CHECK: store float %sel.i0, float* %ptr.i0, align 16
-; CHECK: store float %sel.i1, float* %ptr.i1, align 4
-; CHECK: store float %sel.i2, float* %ptr.i2, align 8
-; CHECK: store float %sel.i3, float* %ptr.i3, align 4
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]]
+; CHECK-NEXT:    [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float*
+; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16
+; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1
+; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4
+; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2
+; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8
+; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3
+; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4
+; CHECK-NEXT:    [[NEG_I0:%.*]] = fneg float [[VAL_I0]]
+; CHECK-NEXT:    [[NEG_I1:%.*]] = fneg float [[VAL_I1]]
+; CHECK-NEXT:    [[NEG_I2:%.*]] = fneg float [[VAL_I2]]
+; CHECK-NEXT:    [[NEG_I3:%.*]] = fneg float [[VAL_I3]]
+; CHECK-NEXT:    [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i32 0
+; CHECK-NEXT:    [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i32 1
+; CHECK-NEXT:    [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i32 2
+; CHECK-NEXT:    [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i32 3
+; CHECK-NEXT:    [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]])
+; CHECK-NEXT:    [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0
+; CHECK-NEXT:    [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
+; CHECK-NEXT:    [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1
+; CHECK-NEXT:    [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
+; CHECK-NEXT:    [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2
+; CHECK-NEXT:    [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
+; CHECK-NEXT:    [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3
+; CHECK-NEXT:    [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
+; CHECK-NEXT:    [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
+; CHECK-NEXT:    [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
+; CHECK-NEXT:    [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
+; CHECK-NEXT:    [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
+; CHECK-NEXT:    store float [[SEL_I0]], float* [[PTR_I0]], align 16
+; CHECK-NEXT:    store float [[SEL_I1]], float* [[PTR_I1]], align 4
+; CHECK-NEXT:    store float [[SEL_I2]], float* [[PTR_I2]], align 8
+; CHECK-NEXT:    store float [[SEL_I3]], float* [[PTR_I3]], align 4
+; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
+; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -488,52 +730,106 @@ exit:
 ; Check that IR flags are preserved.
 define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
 ; CHECK-LABEL: @f16(
-; CHECK: %res.i0 = add nuw nsw i32
-; CHECK: %res.i1 = add nuw nsw i32
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
+; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
+; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
   %res = add nuw nsw <2 x i32> %i, %j
   ret <2 x i32> %res
 }
 define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
 ; CHECK-LABEL: @f17(
-; CHECK: %res.i0 = sdiv exact i32
-; CHECK: %res.i1 = sdiv exact i32
+; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
+; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]]
+; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
+; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
   %res = sdiv exact <2 x i32> %i, %j
   ret <2 x i32> %res
 }
 define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @f18(
-; CHECK: %res.i0 = fadd fast float
-; CHECK: %res.i1 = fadd fast float
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]]
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[RES]]
+;
   %res = fadd fast <2 x float> %x, %y
   ret <2 x float> %res
 }
 define <2 x float> @f19(<2 x float> %x) {
 ; CHECK-LABEL: @f19(
-; CHECK: %res.i0 = fneg fast float
-; CHECK: %res.i1 = fneg fast float
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = fneg fast float [[X_I0]]
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = fneg fast float [[X_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[RES]]
+;
   %res = fneg fast <2 x float> %x
   ret <2 x float> %res
 }
 define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @f20(
-; CHECK: %res.i0 = fcmp fast ogt float
-; CHECK: %res.i1 = fcmp fast ogt float
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]]
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]]
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
   %res = fcmp fast ogt <2 x float> %x, %y
   ret <2 x i1> %res
 }
 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
 define <2 x float> @f21(<2 x float> %x) {
 ; CHECK-LABEL: @f21(
-; CHECK: %res.i0 = call fast float @llvm.sqrt.f32
-; CHECK: %res.i1 = call fast float @llvm.sqrt.f32
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]])
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]])
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[RES]]
+;
   %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
   ret <2 x float> %res
 }
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
 define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
 ; CHECK-LABEL: @f22(
-; CHECK: %res.i0 = call fast float @llvm.fma.f32
-; CHECK: %res.i1 = call fast float @llvm.fma.f32
+; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0
+; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]])
+; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
+; CHECK-NEXT:    [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1
+; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]])
+; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[RES]]
+;
   %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
   ret <2 x float> %res
 }
@@ -541,10 +837,11 @@ define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
 ; See https://reviews.llvm.org/D83101#2133062
 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
 ; CHECK-LABEL: @f23_crash(
-; CHECK: %v0 = extractelement <2 x i32> %srcvec, i32 0
-; CHECK: %t1.upto0 = insertelement <2 x i32> poison, i32 %v0, i32 0
-; CHECK: %t1 = insertelement <2 x i32> %t1.upto0, i32 %v1, i32 1
-; CHECK: ret <2 x i32> %t1
+; CHECK-NEXT:    [[V0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i32 0
+; CHECK-NEXT:    [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[V0]], i32 0
+; CHECK-NEXT:    [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[T1]]
+;
   %v0 = extractelement <2 x i32> %srcvec, i32 0
   %t0 = insertelement <2 x i32> undef, i32 %v0, i32 0
   %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1


        


More information about the llvm-commits mailing list